feedstock 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE +24 -0
- data/README.md +41 -0
- data/default.xml +27 -0
- data/feedstock.gemspec +31 -0
- data/lib/feedstock.rb +100 -0
- data/lib/feedstock/version.rb +5 -0
- metadata +122 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1c87214f9b1fca1686eb03c1364b62f1d8c716e9998c695edb13273e4bc4f3bd
|
4
|
+
data.tar.gz: 34dce9c661854f0225e9036102b3c52cf205fd750a454bc42e54b538670fe2d1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0bcd9b33a1216473daedd4cefab2506e7cf2a763345c6b94d5fb37e0cf6876142cf1269a4229c53d8b5d1118663ad6f4350787e818aefee52d441b3b27fe1581
|
7
|
+
data.tar.gz: da15a4cfd0c7df236ce98fa6e580c268b05ad5ee4d9750fe8beaa0c88a48711790dd4e73dd2c91d5fee870b3e0846816e64b1c4220f7c579b3f3f78b8083f330
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
This is free and unencumbered software released into the public domain.
|
2
|
+
|
3
|
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
4
|
+
distribute this software, either in source code form or as a compiled
|
5
|
+
binary, for any purpose, commercial or non-commercial, and by any
|
6
|
+
means.
|
7
|
+
|
8
|
+
In jurisdictions that recognize copyright laws, the author or authors
|
9
|
+
of this software dedicate any and all copyright interest in the
|
10
|
+
software to the public domain. We make this dedication for the benefit
|
11
|
+
of the public at large and to the detriment of our heirs and
|
12
|
+
successors. We intend this dedication to be an overt act of
|
13
|
+
relinquishment in perpetuity of all present and future rights to this
|
14
|
+
software under copyright law.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
20
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
21
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
For more information, please refer to <http://unlicense.org>
|
data/README.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Feedstock
|
2
|
+
|
3
|
+
Feedstock is a Ruby library for extracting information from a webpage and
|
4
|
+
converting it into an Atom feed.
|
5
|
+
|
6
|
+
## Rationale
|
7
|
+
|
8
|
+
Feeds are great. But sometimes a website doesn't provide a feed or doesn't
|
9
|
+
provide a feed for the specific content that you want. That's where Feedstock
|
10
|
+
can help.
|
11
|
+
|
12
|
+
Feedstock is a Ruby library that you can use to create an Atom feed. It takes a
|
13
|
+
URL to the webpage to check and a hash of rules. The rules tell Feedstock how to
|
14
|
+
extract and transform the data it finds on the webpage.
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
Feedstock is available as a gem:
|
19
|
+
|
20
|
+
```shell
|
21
|
+
$ gem install feedstock
|
22
|
+
```
|
23
|
+
|
24
|
+
## Bugs
|
25
|
+
|
26
|
+
Found a bug? I'd love to know about it. The best way is to report them in the
|
27
|
+
[Issues section][ghi] on GitHub.
|
28
|
+
|
29
|
+
[ghi]: https://github.com/pyrmont/feedstock/issues
|
30
|
+
|
31
|
+
## Versioning
|
32
|
+
|
33
|
+
Tenter uses [Semantic Versioning 2.0.0][sv2].
|
34
|
+
|
35
|
+
[sv2]: http://semver.org/
|
36
|
+
|
37
|
+
## Licence
|
38
|
+
|
39
|
+
Tenter is released into the public domain. See [LICENSE.md][lc] for more details.
|
40
|
+
|
41
|
+
[lc]: https://github.com/pyrmont/feedstock/blob/master/LICENSE.md
|
data/default.xml
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<feed xmlns="http://www.w3.org/2005/Atom">
|
3
|
+
<%- info.each do |name, value| -%>
|
4
|
+
<%- case name -%>
|
5
|
+
<%- when "author" -%>
|
6
|
+
<author><name><%= value %></name></author>
|
7
|
+
<%- when "link" -%>
|
8
|
+
<<%= name %> href="<%= value %>" rel="alternate" />
|
9
|
+
<%- else -%>
|
10
|
+
<<%= name %>><%= value %></<%= name %>>
|
11
|
+
<%- end -%>
|
12
|
+
<%- end -%>
|
13
|
+
<%- entries.each do |entry| -%>
|
14
|
+
<entry>
|
15
|
+
<%- entry.each do |name, value| -%>
|
16
|
+
<%- case name -%>
|
17
|
+
<%- when "author" -%>
|
18
|
+
<author><name><%= value %></name></author>
|
19
|
+
<%- when "link" -%>
|
20
|
+
<<%= name %> href="<%= value %>" rel="alternate" />
|
21
|
+
<%- else -%>
|
22
|
+
<<%= name %>><%= value %></<%= name %>>
|
23
|
+
<%- end -%>
|
24
|
+
<%- end -%>
|
25
|
+
</entry>
|
26
|
+
<%- end -%>
|
27
|
+
</feed>
|
data/feedstock.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "./lib/feedstock/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "feedstock"
|
7
|
+
s.version = Feedstock::VERSION
|
8
|
+
s.authors = ["Michael Camilleri"]
|
9
|
+
s.email = ["mike@inqk.net"]
|
10
|
+
s.summary = "A library for creating RSS feeds from webpages"
|
11
|
+
s.description = <<-desc.strip.gsub(/\s+/, " ")
|
12
|
+
Feedstock is a library for extracting information from a webpage and
|
13
|
+
transforming it into an Atom feed.
|
14
|
+
desc
|
15
|
+
s.homepage = "https://github.com/pyrmont/feedstock/"
|
16
|
+
s.licenses = "Unlicense"
|
17
|
+
s.required_ruby_version = ">= 2.5"
|
18
|
+
|
19
|
+
s.files = Dir["Gemfile", "default.xml", "LICENSE", "README.md",
|
20
|
+
"feedstock.gemspec", "lib/feedstock.rb", "lib/**/*.rb"]
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
s.metadata["allowed_push_host"] = "https://rubygems.org"
|
24
|
+
|
25
|
+
s.add_runtime_dependency "nokogiri"
|
26
|
+
s.add_runtime_dependency "timeliness"
|
27
|
+
|
28
|
+
s.add_development_dependency "minitest"
|
29
|
+
s.add_development_dependency "rake"
|
30
|
+
s.add_development_dependency "warning"
|
31
|
+
end
|
data/lib/feedstock.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "erb"
|
4
|
+
require "nokogiri"
|
5
|
+
require "open-uri"
|
6
|
+
require "timeliness"
|
7
|
+
|
8
|
+
module Feedstock
|
9
|
+
def self.feed(url, rules, template_file = "#{__dir__}/../default.xml")
|
10
|
+
rules = normalise_rules rules
|
11
|
+
page = download_page url
|
12
|
+
info = extract_info page, rules
|
13
|
+
entries = extract_entries page, rules
|
14
|
+
feed = create_feed info, entries, template_file
|
15
|
+
|
16
|
+
feed
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.create_feed(info, entries, template_file)
|
20
|
+
template = ERB.new File.read(template_file), trim_mode: "-"
|
21
|
+
template.result_with_hash info: info, entries: entries
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.download_page(url)
|
25
|
+
Nokogiri::HTML open(url)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.extract_entries(page, rules)
|
29
|
+
static = Hash.new
|
30
|
+
entries = Array.new
|
31
|
+
|
32
|
+
rules['entries'].each do |name, rule|
|
33
|
+
if rule["literal"]
|
34
|
+
static[name] = rule["literal"]
|
35
|
+
elsif rule["repeat"]
|
36
|
+
static[name] = format_content page.at_css(rule["path"]), rule
|
37
|
+
else
|
38
|
+
page.css(rule["path"]).each.with_index do |match, i|
|
39
|
+
entries[i] = Hash.new if entries[i].nil?
|
40
|
+
entries[i].merge!({ name => format_content(match, rule) })
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
unless static.empty?
|
46
|
+
entries.each{ |entry| entry.merge!(static) }
|
47
|
+
end
|
48
|
+
|
49
|
+
entries
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.extract_info(page, rules)
|
53
|
+
info = Hash.new
|
54
|
+
|
55
|
+
rules["info"].each do |name, rule|
|
56
|
+
if rule["literal"]
|
57
|
+
info[name] = rule["literal"]
|
58
|
+
else
|
59
|
+
info[name] = format_content page.at_css(rule["path"]), rule
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
info
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.format_content(match, rule)
|
67
|
+
return "" if match.nil?
|
68
|
+
|
69
|
+
text = if rule["attribute"]
|
70
|
+
match[rule["attribute"]]
|
71
|
+
else
|
72
|
+
match.content.strip
|
73
|
+
end
|
74
|
+
|
75
|
+
case rule["type"]
|
76
|
+
when "cdata"
|
77
|
+
"<![CDATA[#{wrap_content(match.inner_html, rule)}]]>"
|
78
|
+
when "datetime"
|
79
|
+
"#{Timeliness.parse(wrap_content(text, rule))&.iso8601}"
|
80
|
+
else
|
81
|
+
wrap_content text, rule
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.normalise_rules(rules)
|
86
|
+
rules.keys.each do |category|
|
87
|
+
rules[category].each do |name, rule|
|
88
|
+
rules[category][name] = { "path" => rule } unless rule.is_a? Hash
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
rules
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.wrap_content(content, rule)
|
96
|
+
return content unless rule["prepend"] || rule["append"]
|
97
|
+
|
98
|
+
"#{rule["prepend"]}#{content}#{rule["append"]}"
|
99
|
+
end
|
100
|
+
end
|
metadata
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: feedstock
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michael Camilleri
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-09-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: timeliness
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: warning
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Feedstock is a library for extracting information from a webpage and
|
84
|
+
transforming it into an Atom feed.
|
85
|
+
email:
|
86
|
+
- mike@inqk.net
|
87
|
+
executables: []
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- Gemfile
|
92
|
+
- LICENSE
|
93
|
+
- README.md
|
94
|
+
- default.xml
|
95
|
+
- feedstock.gemspec
|
96
|
+
- lib/feedstock.rb
|
97
|
+
- lib/feedstock/version.rb
|
98
|
+
homepage: https://github.com/pyrmont/feedstock/
|
99
|
+
licenses:
|
100
|
+
- Unlicense
|
101
|
+
metadata:
|
102
|
+
allowed_push_host: https://rubygems.org
|
103
|
+
post_install_message:
|
104
|
+
rdoc_options: []
|
105
|
+
require_paths:
|
106
|
+
- lib
|
107
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - ">="
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '2.5'
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
requirements: []
|
118
|
+
rubygems_version: 3.0.3
|
119
|
+
signing_key:
|
120
|
+
specification_version: 4
|
121
|
+
summary: A library for creating RSS feeds from webpages
|
122
|
+
test_files: []
|