feedstock 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1c87214f9b1fca1686eb03c1364b62f1d8c716e9998c695edb13273e4bc4f3bd
4
+ data.tar.gz: 34dce9c661854f0225e9036102b3c52cf205fd750a454bc42e54b538670fe2d1
5
+ SHA512:
6
+ metadata.gz: 0bcd9b33a1216473daedd4cefab2506e7cf2a763345c6b94d5fb37e0cf6876142cf1269a4229c53d8b5d1118663ad6f4350787e818aefee52d441b3b27fe1581
7
+ data.tar.gz: da15a4cfd0c7df236ce98fa6e580c268b05ad5ee4d9750fe8beaa0c88a48711790dd4e73dd2c91d5fee870b3e0846816e64b1c4220f7c579b3f3f78b8083f330
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org>
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # Feedstock
2
+
3
+ Feedstock is a Ruby library for extracting information from a webpage and
4
+ converting it into an Atom feed.
5
+
6
+ ## Rationale
7
+
8
+ Feeds are great. But sometimes a website doesn't provide a feed or doesn't
9
+ provide a feed for the specific content that you want. That's where Feedstock
10
+ can help.
11
+
12
+ Feedstock is a Ruby library that you can use to create an Atom feed. It takes a
13
+ URL to the webpage to check and a hash of rules. The rules tell Feedstock how to
14
+ extract and transform the data it finds on the webpage.
15
+
16
+ ## Installation
17
+
18
+ Feedstock is available as a gem:
19
+
20
+ ```shell
21
+ $ gem install feedstock
22
+ ```
23
+
24
+ ## Bugs
25
+
26
+ Found a bug? I'd love to know about it. The best way is to report them in the
27
+ [Issues section][ghi] on GitHub.
28
+
29
+ [ghi]: https://github.com/pyrmont/feedstock/issues
30
+
31
+ ## Versioning
32
+
33
+ Tenter uses [Semantic Versioning 2.0.0][sv2].
34
+
35
+ [sv2]: http://semver.org/
36
+
37
+ ## Licence
38
+
39
+ Tenter is released into the public domain. See [LICENSE.md][lc] for more details.
40
+
41
+ [lc]: https://github.com/pyrmont/feedstock/blob/master/LICENSE.md
data/default.xml ADDED
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <feed xmlns="http://www.w3.org/2005/Atom">
3
+ <%- info.each do |name, value| -%>
4
+ <%- case name -%>
5
+ <%- when "author" -%>
6
+ <author><name><%= value %></name></author>
7
+ <%- when "link" -%>
8
+ <<%= name %> href="<%= value %>" rel="alternate" />
9
+ <%- else -%>
10
+ <<%= name %>><%= value %></<%= name %>>
11
+ <%- end -%>
12
+ <%- end -%>
13
+ <%- entries.each do |entry| -%>
14
+ <entry>
15
+ <%- entry.each do |name, value| -%>
16
+ <%- case name -%>
17
+ <%- when "author" -%>
18
+ <author><name><%= value %></name></author>
19
+ <%- when "link" -%>
20
+ <<%= name %> href="<%= value %>" rel="alternate" />
21
+ <%- else -%>
22
+ <<%= name %>><%= value %></<%= name %>>
23
+ <%- end -%>
24
+ <%- end -%>
25
+ </entry>
26
+ <%- end -%>
27
+ </feed>
data/feedstock.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "./lib/feedstock/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "feedstock"
7
+ s.version = Feedstock::VERSION
8
+ s.authors = ["Michael Camilleri"]
9
+ s.email = ["mike@inqk.net"]
10
+ s.summary = "A library for creating RSS feeds from webpages"
11
+ s.description = <<-desc.strip.gsub(/\s+/, " ")
12
+ Feedstock is a library for extracting information from a webpage and
13
+ transforming it into an Atom feed.
14
+ desc
15
+ s.homepage = "https://github.com/pyrmont/feedstock/"
16
+ s.licenses = "Unlicense"
17
+ s.required_ruby_version = ">= 2.5"
18
+
19
+ s.files = Dir["Gemfile", "default.xml", "LICENSE", "README.md",
20
+ "feedstock.gemspec", "lib/feedstock.rb", "lib/**/*.rb"]
21
+ s.require_paths = ["lib"]
22
+
23
+ s.metadata["allowed_push_host"] = "https://rubygems.org"
24
+
25
+ s.add_runtime_dependency "nokogiri"
26
+ s.add_runtime_dependency "timeliness"
27
+
28
+ s.add_development_dependency "minitest"
29
+ s.add_development_dependency "rake"
30
+ s.add_development_dependency "warning"
31
+ end
data/lib/feedstock.rb ADDED
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "erb"
4
+ require "nokogiri"
5
+ require "open-uri"
6
+ require "timeliness"
7
+
8
+ module Feedstock
9
+ def self.feed(url, rules, template_file = "#{__dir__}/../default.xml")
10
+ rules = normalise_rules rules
11
+ page = download_page url
12
+ info = extract_info page, rules
13
+ entries = extract_entries page, rules
14
+ feed = create_feed info, entries, template_file
15
+
16
+ feed
17
+ end
18
+
19
+ def self.create_feed(info, entries, template_file)
20
+ template = ERB.new File.read(template_file), trim_mode: "-"
21
+ template.result_with_hash info: info, entries: entries
22
+ end
23
+
24
+ def self.download_page(url)
25
+ Nokogiri::HTML open(url)
26
+ end
27
+
28
+ def self.extract_entries(page, rules)
29
+ static = Hash.new
30
+ entries = Array.new
31
+
32
+ rules['entries'].each do |name, rule|
33
+ if rule["literal"]
34
+ static[name] = rule["literal"]
35
+ elsif rule["repeat"]
36
+ static[name] = format_content page.at_css(rule["path"]), rule
37
+ else
38
+ page.css(rule["path"]).each.with_index do |match, i|
39
+ entries[i] = Hash.new if entries[i].nil?
40
+ entries[i].merge!({ name => format_content(match, rule) })
41
+ end
42
+ end
43
+ end
44
+
45
+ unless static.empty?
46
+ entries.each{ |entry| entry.merge!(static) }
47
+ end
48
+
49
+ entries
50
+ end
51
+
52
+ def self.extract_info(page, rules)
53
+ info = Hash.new
54
+
55
+ rules["info"].each do |name, rule|
56
+ if rule["literal"]
57
+ info[name] = rule["literal"]
58
+ else
59
+ info[name] = format_content page.at_css(rule["path"]), rule
60
+ end
61
+ end
62
+
63
+ info
64
+ end
65
+
66
+ def self.format_content(match, rule)
67
+ return "" if match.nil?
68
+
69
+ text = if rule["attribute"]
70
+ match[rule["attribute"]]
71
+ else
72
+ match.content.strip
73
+ end
74
+
75
+ case rule["type"]
76
+ when "cdata"
77
+ "<![CDATA[#{wrap_content(match.inner_html, rule)}]]>"
78
+ when "datetime"
79
+ "#{Timeliness.parse(wrap_content(text, rule))&.iso8601}"
80
+ else
81
+ wrap_content text, rule
82
+ end
83
+ end
84
+
85
+ def self.normalise_rules(rules)
86
+ rules.keys.each do |category|
87
+ rules[category].each do |name, rule|
88
+ rules[category][name] = { "path" => rule } unless rule.is_a? Hash
89
+ end
90
+ end
91
+
92
+ rules
93
+ end
94
+
95
+ def self.wrap_content(content, rule)
96
+ return content unless rule["prepend"] || rule["append"]
97
+
98
+ "#{rule["prepend"]}#{content}#{rule["append"]}"
99
+ end
100
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Feedstock
4
+ VERSION = "0.1.0"
5
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: feedstock
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Camilleri
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-09-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: timeliness
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: warning
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Feedstock is a library for extracting information from a webpage and
84
+ transforming it into an Atom feed.
85
+ email:
86
+ - mike@inqk.net
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - Gemfile
92
+ - LICENSE
93
+ - README.md
94
+ - default.xml
95
+ - feedstock.gemspec
96
+ - lib/feedstock.rb
97
+ - lib/feedstock/version.rb
98
+ homepage: https://github.com/pyrmont/feedstock/
99
+ licenses:
100
+ - Unlicense
101
+ metadata:
102
+ allowed_push_host: https://rubygems.org
103
+ post_install_message:
104
+ rdoc_options: []
105
+ require_paths:
106
+ - lib
107
+ required_ruby_version: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ version: '2.5'
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ requirements: []
118
+ rubygems_version: 3.0.3
119
+ signing_key:
120
+ specification_version: 4
121
+ summary: A library for creating RSS feeds from webpages
122
+ test_files: []