feedjira 3.2.5 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.github/copilot-instructions.md +176 -0
  3. data/.github/workflows/ruby.yml +1 -1
  4. data/.gitignore +1 -0
  5. data/.rubocop.yml +5 -3
  6. data/.rubocop_todo.yml +360 -4
  7. data/CHANGELOG.md +14 -0
  8. data/Gemfile +3 -3
  9. data/feedjira.gemspec +2 -1
  10. data/lib/feedjira/feed_entry_utilities.rb +6 -3
  11. data/lib/feedjira/feed_utilities.rb +2 -2
  12. data/lib/feedjira/parser/atom_youtube.rb +1 -0
  13. data/lib/feedjira/parser/google_docs_atom.rb +1 -0
  14. data/lib/feedjira/parser/json_feed_item.rb +2 -2
  15. data/lib/feedjira/parser/podlove_chapter.rb +1 -0
  16. data/lib/feedjira/parser/rss.rb +1 -0
  17. data/lib/feedjira/parser/rss_feed_burner.rb +1 -0
  18. data/lib/feedjira/preprocessor.rb +1 -1
  19. data/lib/feedjira/util/parse_time.rb +52 -0
  20. data/lib/feedjira/util.rb +7 -0
  21. data/lib/feedjira/version.rb +1 -1
  22. data/lib/feedjira.rb +2 -1
  23. data/spec/feedjira/atom_entry_utilities_spec.rb +50 -0
  24. data/spec/feedjira/feed_utilities_entry_spec.rb +75 -3
  25. data/spec/feedjira/feed_utilities_spec.rb +19 -0
  26. data/spec/feedjira/parser/atom_entry_spec.rb +2 -2
  27. data/spec/feedjira/parser/atom_feed_burner_entry_spec.rb +1 -1
  28. data/spec/feedjira/parser/atom_google_alerts_entry_spec.rb +19 -4
  29. data/spec/feedjira/parser/atom_youtube_entry_spec.rb +2 -2
  30. data/spec/feedjira/parser/google_docs_atom_spec.rb +36 -0
  31. data/spec/feedjira/parser/i_tunes_rss_category_spec.rb +40 -0
  32. data/spec/feedjira/parser/i_tunes_rss_item_spec.rb +1 -1
  33. data/spec/feedjira/parser/json_feed_item_spec.rb +37 -1
  34. data/spec/feedjira/parser/podlove_chapter_spec.rb +15 -7
  35. data/spec/feedjira/parser/rss_entry_spec.rb +37 -1
  36. data/spec/feedjira/parser/rss_feed_burner_entry_spec.rb +1 -1
  37. data/spec/feedjira/{core_ext/time_spec.rb → util/parse_time_spec.rb} +11 -11
  38. data/spec/feedjira_spec.rb +10 -10
  39. data/spec/support/coverage.rb +1 -1
  40. metadata +30 -9
  41. data/lib/feedjira/core_ext/date.rb +0 -19
  42. data/lib/feedjira/core_ext/string.rb +0 -11
  43. data/lib/feedjira/core_ext/time.rb +0 -38
  44. data/lib/feedjira/core_ext.rb +0 -5
@@ -42,7 +42,7 @@ describe Feedjira::Parser::RSSEntry do
42
42
  end
43
43
 
44
44
  it "parses the published date" do
45
- published = Time.parse_safely "Thu Dec 04 17:17:49 UTC 2008"
45
+ published = Feedjira::Util::ParseTime.call "Thu Dec 04 17:17:49 UTC 2008"
46
46
  expect(@entry.published).to eq published
47
47
  end
48
48
 
@@ -122,4 +122,40 @@ describe Feedjira::Parser::RSSEntry do
122
122
  feed = Feedjira.parse(sample_rss_feed_with_comments)
123
123
  expect(feed.entries[0].comments).to eq "https://news.ycombinator.com/item?id=30937433"
124
124
  end
125
+
126
+ it "returns nil when no URL is available from link or guid" do
127
+ xml = <<~XML
128
+ <rss version="2.0">
129
+ <channel>
130
+ <item>
131
+ <title>Entry without URL</title>
132
+ <description>This entry has no link or guid</description>
133
+ </item>
134
+ </channel>
135
+ </rss>
136
+ XML
137
+
138
+ feed = Feedjira.parse(xml)
139
+ entry = feed.entries.first
140
+
141
+ expect(entry.url).to be_nil
142
+ end
143
+
144
+ it "returns nil when guid exists but is not a permalink" do
145
+ xml = <<~XML
146
+ <rss version="2.0">
147
+ <channel>
148
+ <item>
149
+ <title>Entry with non-permalink GUID</title>
150
+ <guid isPermaLink="false">some-guid-123</guid>
151
+ </item>
152
+ </channel>
153
+ </rss>
154
+ XML
155
+
156
+ feed = Feedjira.parse(xml)
157
+ entry = feed.entries.first
158
+
159
+ expect(entry.url).to be_nil
160
+ end
125
161
  end
@@ -43,7 +43,7 @@ describe Feedjira::Parser::RSSFeedBurnerEntry do
43
43
  end
44
44
 
45
45
  it "parses the published date" do
46
- published = Time.parse_safely "Wed Nov 02 17:25:27 UTC 2011"
46
+ published = Feedjira::Util::ParseTime.call "Wed Nov 02 17:25:27 UTC 2011"
47
47
  expect(@entry.published).to eq published
48
48
  end
49
49
 
@@ -2,44 +2,44 @@
2
2
 
3
3
  require "spec_helper"
4
4
 
5
- RSpec.describe Time do
6
- describe "#parse_safely" do
5
+ RSpec.describe Feedjira::Util::ParseTime do
6
+ describe ".call" do
7
7
  it "returns the datetime in utc when given a Time" do
8
- time = described_class.now
8
+ time = Time.now
9
9
 
10
- expect(described_class.parse_safely(time)).to eq(time.utc)
10
+ expect(described_class.call(time)).to eq(time.utc)
11
11
  end
12
12
 
13
13
  it "returns the datetime in utc when given a Date" do
14
14
  date = Date.today
15
15
 
16
- expect(described_class.parse_safely(date)).to eq(date.to_time.utc)
16
+ expect(described_class.call(date)).to eq(date.to_time.utc)
17
17
  end
18
18
 
19
19
  it "returns the datetime in utc when given a String" do
20
20
  timestamp = "2016-01-01 00:00:00"
21
21
 
22
- expect(described_class.parse_safely(timestamp)).to eq(described_class.parse(timestamp).utc)
22
+ expect(described_class.call(timestamp)).to eq(Time.parse(timestamp).utc)
23
23
  end
24
24
 
25
25
  it "returns nil when given an empty String" do
26
26
  timestamp = ""
27
27
 
28
- expect(described_class.parse_safely(timestamp)).to be_nil
28
+ expect(described_class.call(timestamp)).to be_nil
29
29
  end
30
30
 
31
31
  it "returns the the datetime in utc given a 14-digit time" do
32
- time = described_class.now.utc
32
+ time = Time.now.utc
33
33
  timestamp = time.strftime("%Y%m%d%H%M%S")
34
34
 
35
- expect(described_class.parse_safely(timestamp)).to eq(time.floor)
35
+ expect(described_class.call(timestamp)).to eq(time.floor)
36
36
  end
37
37
 
38
38
  context "when given an invalid time string" do
39
39
  it "returns nil" do
40
40
  timestamp = "2016-51-51 00:00:00"
41
41
 
42
- expect(described_class.parse_safely(timestamp)).to be_nil
42
+ expect(described_class.call(timestamp)).to be_nil
43
43
  end
44
44
 
45
45
  it "logs an error" do
@@ -50,7 +50,7 @@ RSpec.describe Time do
50
50
  expect(Feedjira.logger)
51
51
  .to receive(:debug).with(an_instance_of(ArgumentError))
52
52
 
53
- described_class.parse_safely(timestamp)
53
+ described_class.call(timestamp)
54
54
  end
55
55
  end
56
56
  end
@@ -10,7 +10,7 @@ RSpec.describe Feedjira do
10
10
  feed = described_class.parse(sample_rss_feed, parser: parser)
11
11
 
12
12
  expect(feed.title).to eq "Tender Lovemaking"
13
- published = Time.parse_safely "Thu Dec 04 17:17:49 UTC 2008"
13
+ published = Feedjira::Util::ParseTime.call "Thu Dec 04 17:17:49 UTC 2008"
14
14
  expect(feed.entries.first.published).to eq published
15
15
  expect(feed.entries.size).to eq 10
16
16
  end
@@ -20,7 +20,7 @@ RSpec.describe Feedjira do
20
20
  it "parses an rdf feed" do
21
21
  feed = described_class.parse(sample_rdf_feed)
22
22
  expect(feed.title).to eq "HREF Considered Harmful"
23
- published = Time.parse_safely("Tue Sep 02 19:50:07 UTC 2008")
23
+ published = Feedjira::Util::ParseTime.call("Tue Sep 02 19:50:07 UTC 2008")
24
24
  expect(feed.entries.first.published).to eq published
25
25
  expect(feed.entries.size).to eq 10
26
26
  end
@@ -28,7 +28,7 @@ RSpec.describe Feedjira do
28
28
  it "parses an rss feed" do
29
29
  feed = described_class.parse(sample_rss_feed)
30
30
  expect(feed.title).to eq "Tender Lovemaking"
31
- published = Time.parse_safely "Thu Dec 04 17:17:49 UTC 2008"
31
+ published = Feedjira::Util::ParseTime.call "Thu Dec 04 17:17:49 UTC 2008"
32
32
  expect(feed.entries.first.published).to eq published
33
33
  expect(feed.entries.size).to eq 10
34
34
  end
@@ -36,7 +36,7 @@ RSpec.describe Feedjira do
36
36
  it "parses an atom feed" do
37
37
  feed = described_class.parse(sample_atom_feed)
38
38
  expect(feed.title).to eq "Amazon Web Services Blog"
39
- published = Time.parse_safely "Fri Jan 16 18:21:00 UTC 2009"
39
+ published = Feedjira::Util::ParseTime.call "Fri Jan 16 18:21:00 UTC 2009"
40
40
  expect(feed.entries.first.published).to eq published
41
41
  expect(feed.entries.size).to eq 10
42
42
  end
@@ -44,7 +44,7 @@ RSpec.describe Feedjira do
44
44
  it "parses an feedburner atom feed" do
45
45
  feed = described_class.parse(sample_feedburner_atom_feed)
46
46
  expect(feed.title).to eq "Paul Dix Explains Nothing"
47
- published = Time.parse_safely "Thu Jan 22 15:50:22 UTC 2009"
47
+ published = Feedjira::Util::ParseTime.call "Thu Jan 22 15:50:22 UTC 2009"
48
48
  expect(feed.entries.first.published).to eq published
49
49
  expect(feed.entries.size).to eq 5
50
50
  end
@@ -52,7 +52,7 @@ RSpec.describe Feedjira do
52
52
  it "parses an itunes feed" do
53
53
  feed = described_class.parse(sample_itunes_feed)
54
54
  expect(feed.title).to eq "All About Everything"
55
- published = Time.parse_safely "Wed, 15 Jun 2005 19:00:00 GMT"
55
+ published = Feedjira::Util::ParseTime.call "Wed, 15 Jun 2005 19:00:00 GMT"
56
56
  expect(feed.entries.first.published).to eq published
57
57
  expect(feed.entries.size).to eq 3
58
58
  end
@@ -60,7 +60,7 @@ RSpec.describe Feedjira do
60
60
  it "parses an itunes feedburner feed" do
61
61
  feed = described_class.parse(sample_itunes_feedburner_feed)
62
62
  expect(feed.title).to eq "Welcome to Night Vale"
63
- published = Time.parse_safely "2023-09-22 16:30:15 UTC"
63
+ published = Feedjira::Util::ParseTime.call "2023-09-22 16:30:15 UTC"
64
64
  expect(feed.entries.first.published).to eq published
65
65
  expect(feed.entries.size).to eq 3
66
66
  url = "https://www.podtrac.com/pts/redirect.mp3/dovetail.prxu.org/_/126/e3dafc45-a202-42d0-a55b-216e733a2d7a/2023_09_17_BTS_Episode_EXCERPT_v2.mp3"
@@ -80,7 +80,7 @@ RSpec.describe Feedjira do
80
80
 
81
81
  feed = described_class.parse(sample_invalid_date_format_feed)
82
82
  expect(feed.title).to eq "Invalid date format feed"
83
- published = Time.parse_safely "Mon, 16 Oct 2017 15:10:00 GMT"
83
+ published = Feedjira::Util::ParseTime.call "Mon, 16 Oct 2017 15:10:00 GMT"
84
84
  expect(feed.entries.first.published).to eq published
85
85
  expect(feed.entries.size).to eq 2
86
86
  end
@@ -97,7 +97,7 @@ RSpec.describe Feedjira do
97
97
  it "parses an feedburner rss feed" do
98
98
  feed = described_class.parse(sample_rss_feed_burner_feed)
99
99
  expect(feed.title).to eq "TechCrunch"
100
- published = Time.parse_safely "Wed Nov 02 17:25:27 UTC 2011"
100
+ published = Feedjira::Util::ParseTime.call "Wed Nov 02 17:25:27 UTC 2011"
101
101
  expect(feed.entries.first.published).to eq published
102
102
  expect(feed.entries.size).to eq 20
103
103
  end
@@ -106,7 +106,7 @@ RSpec.describe Feedjira do
106
106
  feed = described_class.parse(sample_rss_feed_with_a10_namespace)
107
107
  expect(feed.url).to eq "http://www.example.com/"
108
108
  expect(feed.entries.first.url).to eq "http://www.example.com/5"
109
- expect(feed.entries.first.updated).to eq Time.parse_safely("2020-05-14T10:00:18Z")
109
+ expect(feed.entries.first.updated).to eq Feedjira::Util::ParseTime.call("2020-05-14T10:00:18Z")
110
110
  expect(feed.entries.first.author).to eq "John Doe"
111
111
  expect(feed.entries.size).to eq 5
112
112
  end
@@ -7,4 +7,4 @@ SimpleCov.start do
7
7
  add_filter "_spec.rb"
8
8
  end
9
9
 
10
- SimpleCov.minimum_coverage(line: 98, branch: 75)
10
+ SimpleCov.minimum_coverage(line: 100, branch: 100)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedjira
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.5
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Hess
@@ -13,8 +13,28 @@ authors:
13
13
  - Paul Dix
14
14
  bindir: bin
15
15
  cert_chain: []
16
- date: 2025-04-11 00:00:00.000000000 Z
16
+ date: 1980-01-02 00:00:00.000000000 Z
17
17
  dependencies:
18
+ - !ruby/object:Gem::Dependency
19
+ name: logger
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: '1.0'
25
+ - - "<"
26
+ - !ruby/object:Gem::Version
27
+ version: '2'
28
+ type: :runtime
29
+ prerelease: false
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '1.0'
35
+ - - "<"
36
+ - !ruby/object:Gem::Version
37
+ version: '2'
18
38
  - !ruby/object:Gem::Dependency
19
39
  name: loofah
20
40
  requirement: !ruby/object:Gem::Requirement
@@ -61,6 +81,7 @@ extra_rdoc_files: []
61
81
  files:
62
82
  - ".github/ISSUE_TEMPLATE/feed-parsing.md"
63
83
  - ".github/ISSUE_TEMPLATE/general-issue.md"
84
+ - ".github/copilot-instructions.md"
64
85
  - ".github/dependabot.yml"
65
86
  - ".github/workflows/ruby.yml"
66
87
  - ".gitignore"
@@ -77,10 +98,6 @@ files:
77
98
  - lib/feedjira.rb
78
99
  - lib/feedjira/atom_entry_utilities.rb
79
100
  - lib/feedjira/configuration.rb
80
- - lib/feedjira/core_ext.rb
81
- - lib/feedjira/core_ext/date.rb
82
- - lib/feedjira/core_ext/string.rb
83
- - lib/feedjira/core_ext/time.rb
84
101
  - lib/feedjira/feed.rb
85
102
  - lib/feedjira/feed_entry_utilities.rb
86
103
  - lib/feedjira/feed_utilities.rb
@@ -110,9 +127,11 @@ files:
110
127
  - lib/feedjira/parser/rss_image.rb
111
128
  - lib/feedjira/preprocessor.rb
112
129
  - lib/feedjira/rss_entry_utilities.rb
130
+ - lib/feedjira/util.rb
131
+ - lib/feedjira/util/parse_time.rb
113
132
  - lib/feedjira/version.rb
133
+ - spec/feedjira/atom_entry_utilities_spec.rb
114
134
  - spec/feedjira/configuration_spec.rb
115
- - spec/feedjira/core_ext/time_spec.rb
116
135
  - spec/feedjira/feed_spec.rb
117
136
  - spec/feedjira/feed_utilities_entry_spec.rb
118
137
  - spec/feedjira/feed_utilities_spec.rb
@@ -126,6 +145,7 @@ files:
126
145
  - spec/feedjira/parser/atom_youtube_spec.rb
127
146
  - spec/feedjira/parser/google_docs_atom_entry_spec.rb
128
147
  - spec/feedjira/parser/google_docs_atom_spec.rb
148
+ - spec/feedjira/parser/i_tunes_rss_category_spec.rb
129
149
  - spec/feedjira/parser/i_tunes_rss_item_spec.rb
130
150
  - spec/feedjira/parser/i_tunes_rss_owner_spec.rb
131
151
  - spec/feedjira/parser/itunes_rss_spec.rb
@@ -137,6 +157,7 @@ files:
137
157
  - spec/feedjira/parser/rss_feed_burner_spec.rb
138
158
  - spec/feedjira/parser/rss_spec.rb
139
159
  - spec/feedjira/preprocessor_spec.rb
160
+ - spec/feedjira/util/parse_time_spec.rb
140
161
  - spec/feedjira_spec.rb
141
162
  - spec/sample_feeds.rb
142
163
  - spec/sample_feeds/AmazonWebServicesBlog.xml
@@ -198,14 +219,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
198
219
  requirements:
199
220
  - - ">="
200
221
  - !ruby/object:Gem::Version
201
- version: '2.7'
222
+ version: '3.1'
202
223
  required_rubygems_version: !ruby/object:Gem::Requirement
203
224
  requirements:
204
225
  - - ">="
205
226
  - !ruby/object:Gem::Version
206
227
  version: '0'
207
228
  requirements: []
208
- rubygems_version: 3.6.2
229
+ rubygems_version: 3.6.9
209
230
  specification_version: 4
210
231
  summary: A feed parsing library
211
232
  test_files: []
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Date code pulled and adapted from:
4
- # Ruby Cookbook by Lucas Carlson and Leonard Richardson
5
- # Published by O'Reilly
6
- # ISBN: 0-596-52369-6
7
- class Date
8
- def feed_utils_to_gm_time
9
- feed_utils_to_time(new_offset, :gm)
10
- end
11
-
12
- private
13
-
14
- def feed_utils_to_time(dest, method)
15
- # Convert a fraction of a day to a number of microseconds
16
- usec = (dest.sec_fraction * (10**6)).to_i
17
- Time.send(method, dest.year, dest.month, dest.day, dest.hour, dest.min, dest.sec, usec)
18
- end
19
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class String
4
- def sanitize!
5
- replace(sanitize)
6
- end
7
-
8
- def sanitize
9
- Loofah.scrub_fragment(self, :prune).to_s
10
- end
11
- end
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "time"
4
- require "date"
5
-
6
- class Time
7
- # Parse a time string and convert it to UTC without raising errors.
8
- # Parses a flattened 14-digit time (YYYYmmddHHMMMSS) as UTC.
9
- #
10
- # === Parameters
11
- # [dt<String or Time>] Time definition to be parsed.
12
- #
13
- # === Returns
14
- # A Time instance in UTC or nil if there were errors while parsing.
15
- def self.parse_safely(datetime)
16
- if datetime.is_a?(Time)
17
- datetime.utc
18
- elsif datetime.respond_to?(:to_datetime)
19
- datetime.to_time.utc
20
- else
21
- parse_string_safely datetime.to_s
22
- end
23
- rescue StandardError => e
24
- Feedjira.logger.debug("Failed to parse time #{datetime}")
25
- Feedjira.logger.debug(e)
26
- nil
27
- end
28
-
29
- def self.parse_string_safely(string)
30
- return nil if string.empty?
31
-
32
- if /\A\d{14}\z/.match?(string)
33
- parse("#{string}Z", true)
34
- else
35
- parse(string).utc
36
- end
37
- end
38
- end
@@ -1,5 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "core_ext/time"
4
- require_relative "core_ext/date"
5
- require_relative "core_ext/string"