weneedfeed 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0fdaa4a0f724b7018d6b1b2b585aeaf970eff37538df7ca7aaf760fc217b56dc
4
- data.tar.gz: 9f34a11d59aaada2d5afd689263f722c0143af5e59b9741e7d6f34d16d807c57
3
+ metadata.gz: 65814d68cfaf1936de388ce721e3aee49a0e4641c0ecd6ae2afb72c858fbd8f4
4
+ data.tar.gz: ed124803cfb9589dd3b8e44169454b07623a2460a7ac2412c1946d08e51d590f
5
5
  SHA512:
6
- metadata.gz: 878189a82437b83ed25d0c25c5a6de0f4b840c1daf117ccd83d31be1ecbbe2db0d97289bd5ab93e327508562843ddea253948cad63324d3557ab28a0d5052268
7
- data.tar.gz: 8c56392e937c331456b44c869a96316c892b5321fd85611325c91c3987de079d101144998a4399f4fa668c46181e3f0c51f79fe157bf333d211fcf5ae0aeec87
6
+ metadata.gz: 486e7af49032ebf8124d6360cd835ec5ede778c4dc6911259fcee753247166ef6c53e3468bb3c386a3e2d794b3939685795c230a4ff24e65a67e615c5b39a800
7
+ data.tar.gz: 636f09f3860a1a7a16bf425facd29988e7037ae35a9ab8a11b8a37118339c9bdf750336eb01b8c36e2bee0d8b81ed2b1a9846269096428b702c09b27b873617a
@@ -9,3 +9,6 @@ Metrics:
9
9
 
10
10
  Style/Documentation:
11
11
  Enabled: false
12
+
13
+ Style/TrailingCommaInArguments:
14
+ Enabled: false
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 0.6.0 - 2020-11-15
11
+
12
+ ### Changed
13
+
14
+ - Change pages schema from Hash to Array.
15
+ - Require hanami-router 2.0.0.alpha3 or later versions.
16
+ - Ignore pubDate when item.time is not found.
17
+ - Make some item_description_selector and item_time_selector optional.
18
+
10
19
  ## 0.5.0 - 2020-11-14
11
20
 
12
21
  ### Added
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- weneedfeed (0.5.0)
4
+ weneedfeed (0.6.0)
5
5
  faraday
6
+ hanami-router (= 2.0.0.alpha3)
6
7
  hibana
7
8
  nokogiri
8
9
  rack-capture (>= 0.4.0)
@@ -14,29 +15,28 @@ GEM
14
15
  addressable (2.7.0)
15
16
  public_suffix (>= 2.0.2, < 5.0)
16
17
  ast (2.4.1)
17
- concurrent-ruby (1.1.7)
18
18
  crack (0.4.4)
19
19
  diff-lcs (1.4.4)
20
20
  faraday (1.1.0)
21
21
  multipart-post (>= 1.2, < 3)
22
22
  ruby2_keywords
23
- hanami-router (1.3.2)
24
- hanami-utils (~> 1.3)
25
- http_router (= 0.11.2)
23
+ hanami-router (2.0.0.alpha3)
24
+ mustermann (~> 1.0)
25
+ mustermann-contrib (~> 1.0)
26
26
  rack (~> 2.0)
27
- hanami-utils (1.3.6)
28
- concurrent-ruby (~> 1.0)
29
- transproc (~> 1.0)
27
+ hansi (0.2.0)
30
28
  hashdiff (1.0.1)
31
29
  hibana (0.1.1)
32
30
  hanami-router
33
31
  rack
34
32
  tilt
35
- http_router (0.11.2)
36
- rack (>= 1.0.0)
37
- url_mount (~> 0.2.1)
38
33
  mini_portile2 (2.4.0)
39
34
  multipart-post (2.1.1)
35
+ mustermann (1.1.1)
36
+ ruby2_keywords (~> 0.0.1)
37
+ mustermann-contrib (1.1.1)
38
+ hansi (~> 0.2.0)
39
+ mustermann (= 1.1.1)
40
40
  nokogiri (1.10.10)
41
41
  mini_portile2 (~> 2.4.0)
42
42
  parallel (1.19.2)
@@ -80,10 +80,7 @@ GEM
80
80
  ruby2_keywords (0.0.2)
81
81
  thor (1.0.1)
82
82
  tilt (2.0.10)
83
- transproc (1.1.1)
84
83
  unicode-display_width (1.7.0)
85
- url_mount (0.2.1)
86
- rack
87
84
  webmock (3.9.3)
88
85
  addressable (>= 2.3.6)
89
86
  crack (>= 0.3.2)
data/README.md CHANGED
@@ -33,51 +33,79 @@ You need to write a schema file named with `weneedfeed.yml` to use this gem.
33
33
 
34
34
  ```yaml
35
35
  pages:
36
- example1:
37
- title: example site 1
36
+ - id: example1
37
+ title: Example feed with CSS Selector
38
38
  url: http://example.com/1
39
39
  item_selector: li
40
40
  item_description_selector: p:nth-child(3)
41
41
  item_link_selector: a
42
42
  item_time_selector: time
43
43
  item_title_selector: p:nth-child(2)
44
- example2:
45
- title: example site 2
44
+ - id: example2
45
+ title: Example feed with XPath
46
46
  url: http://example.com/2
47
47
  item_selector: //li
48
48
  item_description_selector: .//p[3]
49
- item_link_selector: .//a/@href
49
+ item_link_selector: .//a
50
50
  item_time_selector: .//time
51
51
  item_title_selector: .//p[2]
52
52
  ```
53
53
 
54
+ ### `id`
55
+
56
+ Feed ID.
57
+
58
+ - required
59
+ - Used for feed URL.
60
+
54
61
  ### `title`
55
62
 
56
- Feed title, used for RSS `<title>` element in `<channel>` element.
63
+ Feed title.
64
+
65
+ - required
66
+ - Used for RSS `<title>` element in `<channel>` element.
57
67
 
58
68
  ### `url`
59
69
 
60
- URL to fetch HTML page for building feed.
70
+ HTML source URL.
71
+
72
+ - required
73
+ - Used to fetch HTML page for building feed.
61
74
 
62
75
  ### `item_selector`
63
76
 
64
- CSS or XPath selector to search each item, equivalent unit to RSS `<item>` element.
77
+ CSS or XPath selector to search each item.
65
78
 
66
- ### `item_link_selector`
79
+ - required
80
+ - Equivalent unit to RSS `<item>` element.
67
81
 
68
- CSS or XPath selector to find `<a>` element in each item, used for `<link>` in `<item>`.
82
+ ### `item_link_selector`
69
83
 
70
- ### `item_time_selector`
84
+ CSS or XPath selector to find `<a>` element in each item.
71
85
 
72
- CSS or XPath selector to find element with datetime information in each item, used for `<pubDate>` in `<item>`. Its `datetime` attribute or its inner HTML is used to calculate datetime.
86
+ - required
87
+ - Used for `<link>` in `<item>`.
73
88
 
74
89
  ### `item_title_selector`
75
90
 
76
- CSS or XPath selector to find element with title information in each item, used for `<pubDate>` in `<item>`.
91
+ CSS or XPath selector to find element with title information in each item.
92
+
93
+ - required
94
+ - Used for `<title>` in `<item>`.
77
95
 
78
96
  ### `item_description_selector`
79
97
 
80
- CSS or XPath selector to find element with description information in each item, used for `<description>` in `<item>`.
98
+ CSS or XPath selector to find element with description information in each item.
99
+
100
+ - optional
101
+ - Used for `<description>` in `<item>`.
102
+
103
+ ### `item_time_selector`
104
+
105
+ CSS or XPath selector to find element with datetime information in each item.
106
+
107
+ - optional
108
+ - Used for `<pubDate>` in `<item>`. Its `datetime` attribute or its inner HTML is used to calculate datetime.
81
109
 
82
110
  ## Usage
83
111
 
@@ -7,8 +7,11 @@ module Weneedfeed
7
7
  autoload :Capture, 'weneedfeed/capture'
8
8
  autoload :Command, 'weneedfeed/command'
9
9
  autoload :Controllers, 'weneedfeed/controllers'
10
+ autoload :Helpers, 'weneedfeed/helpers'
10
11
  autoload :Item, 'weneedfeed/item'
11
12
  autoload :Page, 'weneedfeed/page'
13
+ autoload :PageSchema, 'weneedfeed/page_schema'
14
+ autoload :Schema, 'weneedfeed/schema'
12
15
  autoload :Scraping, 'weneedfeed/scraping'
13
16
  autoload :Views, 'weneedfeed/views'
14
17
  end
@@ -6,12 +6,12 @@ module Weneedfeed
6
6
  class Application < ::Hibana::Application
7
7
  route do
8
8
  get '/', to: ::Weneedfeed::Controllers::ShowTopPage, as: :top_page
9
- get '/feeds/:page_name.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
9
+ get '/feeds/:page_id.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
10
10
  end
11
11
 
12
12
  # @param [Hash] schema
13
13
  def initialize(schema:)
14
- @schema = schema
14
+ @schema = ::Weneedfeed::Schema.new(schema)
15
15
  super()
16
16
  end
17
17
 
@@ -23,8 +23,8 @@ module Weneedfeed
23
23
 
24
24
  # @return [Array<String>]
25
25
  def paths
26
- ['/'] + @schema['pages'].keys.map do |page_name|
27
- "/feeds/#{page_name}.xml"
26
+ ['/'] + @schema.page_ids.map do |page_id|
27
+ "/feeds/#{page_id}.xml"
28
28
  end
29
29
  end
30
30
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
+ autoload :Base, 'weneedfeed/controllers/base'
5
6
  autoload :ShowFeed, 'weneedfeed/controllers/show_feed'
6
7
  autoload :ShowTopPage, 'weneedfeed/controllers/show_top_page'
7
8
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Controllers
5
+ class Base < ::Hibana::Controller
6
+ include ::Weneedfeed::Helpers::Parameters
7
+
8
+ private
9
+
10
+ # @return [Weneedfeed::Schema]
11
+ def schema
12
+ request.env['weneedfeed.schema']
13
+ end
14
+ end
15
+ end
16
+ end
@@ -2,31 +2,22 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
- class ShowFeed < ::Hibana::Controller
5
+ class ShowFeed < ::Weneedfeed::Controllers::Base
6
6
  def call
7
- env = request.env
8
- page_name = env.dig(
9
- 'router.params',
10
- :page_name
11
- )
12
- properties = env.dig(
13
- 'weneedfeed.schema',
14
- 'pages',
15
- page_name
16
- )
17
- unless properties
7
+ page_schema = schema.find_page_schema(path_parameters[:page_id])
8
+ unless page_schema
18
9
  response.status = 404
19
10
  return
20
11
  end
21
12
 
22
13
  scraping = ::Weneedfeed::Scraping.new(
23
- item_description_selector: properties['item_description_selector'],
24
- item_link_selector: properties['item_link_selector'],
25
- item_time_selector: properties['item_time_selector'],
26
- item_title_selector: properties['item_title_selector'],
27
- item_selector: properties['item_selector'],
28
- title: properties['title'],
29
- url: properties['url']
14
+ item_description_selector: page_schema.item_description_selector,
15
+ item_link_selector: page_schema.item_link_selector,
16
+ item_time_selector: page_schema.item_time_selector,
17
+ item_title_selector: page_schema.item_title_selector,
18
+ item_selector: page_schema.item_selector,
19
+ title: page_schema.title,
20
+ url: page_schema.url,
30
21
  )
31
22
  page = scraping.call
32
23
 
@@ -2,18 +2,13 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
- class ShowTopPage < ::Hibana::Controller
5
+ class ShowTopPage < ::Weneedfeed::Controllers::Base
6
6
  def call
7
- pages = request.env.dig(
8
- 'weneedfeed.schema',
9
- 'pages'
10
- ).sort_by do |_key, value|
11
- value['title']
12
- end
7
+ page_schemata = schema.page_schemata.sort_by(&:title)
13
8
  response.content_type = 'text/html'
14
9
  response.write(
15
10
  ::Weneedfeed::Views::ShowTopPage.new(
16
- pages: pages,
11
+ page_schemata: page_schemata,
17
12
  partial_template_path: ::File.expand_path(
18
13
  'templates/show_top_page.html.erb',
19
14
  "#{__dir__}/../../.."
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Helpers
5
+ autoload :Parameters, 'weneedfeed/helpers/parameters'
6
+ end
7
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Helpers
5
+ module Parameters
6
+ private
7
+
8
+ # @return [Hash]
9
+ def path_parameters
10
+ request.env['router.params']
11
+ end
12
+ end
13
+ end
14
+ end
@@ -15,8 +15,8 @@ module Weneedfeed
15
15
  end
16
16
  end
17
17
 
18
- # @param [String] description_selector
19
- # @param [String] link_selector
18
+ # @param [String, nil] description_selector
19
+ # @param [String, nil] link_selector
20
20
  # @param [Nokogiri::Node] node
21
21
  # @param [String] time_selector
22
22
  # @param [String] title_selector
@@ -39,6 +39,8 @@ module Weneedfeed
39
39
 
40
40
  # @return [String, nil]
41
41
  def description
42
+ return unless @description_selector
43
+
42
44
  @node.at(@description_selector)&.inner_html
43
45
  end
44
46
 
@@ -52,7 +54,12 @@ module Weneedfeed
52
54
 
53
55
  # @return [Time, nil]
54
56
  def time
55
- self.class.parse_time(time_string)
57
+ return unless @time_selector
58
+
59
+ string = time_string
60
+ return unless string
61
+
62
+ self.class.parse_time(string)
56
63
  end
57
64
 
58
65
  # @return [String]
@@ -67,7 +74,7 @@ module Weneedfeed
67
74
  @node.at(@time_selector)
68
75
  end
69
76
 
70
- # @return [String]
77
+ # @return [String, nil]
71
78
  def time_string
72
79
  node = time_node
73
80
  return unless node
@@ -8,9 +8,9 @@ module Weneedfeed
8
8
  # @return [String]
9
9
  attr_reader :url
10
10
 
11
- # @param [String] item_description_selector
11
+ # @param [String, nil] item_description_selector
12
12
  # @param [String] item_link_selector
13
- # @param [String] item_time_selector
13
+ # @param [String, nil] item_time_selector
14
14
  # @param [String] item_title_selector
15
15
  # @param [String] item_selector
16
16
  # @param [Nokogiri::Node] node
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ PageSchema = Struct.new(
5
+ :id,
6
+ :item_description_selector,
7
+ :item_link_selector,
8
+ :item_time_selector,
9
+ :item_title_selector,
10
+ :item_selector,
11
+ :title,
12
+ :url,
13
+ keyword_init: true,
14
+ )
15
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ class Schema
5
+ # @param [Hash] raw
6
+ def initialize(raw)
7
+ @raw = raw
8
+ end
9
+
10
+ # @param [String, nl] page_schema_id
11
+ # @return [Weneedfeed::PageSchema]
12
+ def find_page_schema(page_schema_id)
13
+ page_schemata.find do |page_schema|
14
+ page_schema.id == page_schema_id
15
+ end
16
+ end
17
+
18
+ # @return [Array<String>]
19
+ def page_ids
20
+ page_schemata.map(&:id)
21
+ end
22
+
23
+ # @return [Array<Weneedfeed::PageSchema>]
24
+ def page_schemata
25
+ @raw['pages'].map do |hash|
26
+ ::Weneedfeed::PageSchema.new(
27
+ id: hash['id'],
28
+ item_link_selector: hash['item_link_selector'],
29
+ item_time_selector: hash['item_time_selector'],
30
+ item_title_selector: hash['item_title_selector'],
31
+ item_selector: hash['item_selector'],
32
+ title: hash['title'],
33
+ url: hash['url'],
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
@@ -5,9 +5,9 @@ require 'nokogiri'
5
5
 
6
6
  module Weneedfeed
7
7
  class Scraping
8
- # @param [String] item_description_selector
8
+ # @param [String, nil] item_description_selector
9
9
  # @param [String] item_link_selector
10
- # @param [String] item_time_selector
10
+ # @param [String, nil] item_time_selector
11
11
  # @param [String] item_title_selector
12
12
  # @param [String] item_selector
13
13
  # @param [String] title
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Weneedfeed
4
- VERSION = '0.5.0'
4
+ VERSION = '0.6.0'
5
5
  end
@@ -3,6 +3,8 @@
3
3
  module Weneedfeed
4
4
  module Views
5
5
  class ShowFeed < ::Hibana::View
6
+ include ::Weneedfeed::Helpers::Parameters
7
+
6
8
  # @param [Weneedfeed::Page] page
7
9
  def initialize(page:, **argv)
8
10
  super(**argv)
@@ -13,7 +15,7 @@ module Weneedfeed
13
15
 
14
16
  # @return [String]
15
17
  def top_page_path
16
- request.path.delete_suffix(router.path(:feed, page_name: page_name))
18
+ request.path.delete_suffix(router.path(:feed, page_id: page_id))
17
19
  end
18
20
 
19
21
  # @return [Enumerable<Weneedfeed::Item>]
@@ -24,8 +26,8 @@ module Weneedfeed
24
26
  end
25
27
 
26
28
  # @return [String]
27
- def page_name
28
- request.env['router.params'][:page_name]
29
+ def page_id
30
+ path_parameters[:page_id]
29
31
  end
30
32
 
31
33
  # @return [Hanami::Router]
@@ -3,10 +3,10 @@
3
3
  module Weneedfeed
4
4
  module Views
5
5
  class ShowTopPage < ::Hibana::View
6
- # @param [Array<Weneedfeed::Page>] pages
7
- def initialize(pages:, **argv)
6
+ # @param [Array<Hash>] page_schemata
7
+ def initialize(page_schemata:, **argv)
8
8
  super(**argv)
9
- @pages = pages
9
+ @page_schemata = page_schemata
10
10
  end
11
11
 
12
12
  private
@@ -16,10 +16,10 @@ module Weneedfeed
16
16
  request.path.delete_suffix(router.path(:top_page))
17
17
  end
18
18
 
19
- # @param [String] page_name
19
+ # @param [String] page_id
20
20
  # @return [String]
21
- def feed_path(page_name:)
22
- "#{base_path}#{router.path(:feed, page_name: page_name)}"
21
+ def feed_path(page_id:)
22
+ "#{base_path}#{router.path(:feed, page_id: page_id)}"
23
23
  end
24
24
 
25
25
  # @return [Hanami::Router]
@@ -12,7 +12,9 @@
12
12
  <item>
13
13
  <title><![CDATA[<%= item.title %>]]></title>
14
14
  <link><%= item.link %></link>
15
- <pubDate><%= item.time.rfc822 %></pubDate>
15
+ <% if item.time %>
16
+ <pubDate><%= item.time.rfc822 %></pubDate>
17
+ <% end %>
16
18
  <description><![CDATA[<%= item.description %>]]></description>
17
19
  <content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
18
20
  <guid isPermaLink="true"><%= item.link %></guid>
@@ -7,9 +7,9 @@
7
7
  </head>
8
8
  <body>
9
9
  <ul>
10
- <% @pages.each do |page_name, hash| %>
10
+ <% @page_schemata.each do |page_schema| %>
11
11
  <li>
12
- <a href="<%= feed_path(page_name: page_name) %>"><%= hash['title'] %></a>
12
+ <a href="<%= feed_path(page_id: page_schema.id) %>"><%= page_schema.title %></a>
13
13
  </li>
14
14
  <% end %>
15
15
  </ul>
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.require_paths = ['lib']
27
27
 
28
28
  spec.add_runtime_dependency 'faraday'
29
+ spec.add_runtime_dependency 'hanami-router', '2.0.0.alpha3'
29
30
  spec.add_runtime_dependency 'hibana'
30
31
  spec.add_runtime_dependency 'nokogiri'
31
32
  spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weneedfeed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-14 00:00:00.000000000 Z
11
+ date: 2020-11-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hanami-router
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.0.alpha3
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 2.0.0.alpha3
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: hibana
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -106,10 +120,15 @@ files:
106
120
  - lib/weneedfeed/capture.rb
107
121
  - lib/weneedfeed/command.rb
108
122
  - lib/weneedfeed/controllers.rb
123
+ - lib/weneedfeed/controllers/base.rb
109
124
  - lib/weneedfeed/controllers/show_feed.rb
110
125
  - lib/weneedfeed/controllers/show_top_page.rb
126
+ - lib/weneedfeed/helpers.rb
127
+ - lib/weneedfeed/helpers/parameters.rb
111
128
  - lib/weneedfeed/item.rb
112
129
  - lib/weneedfeed/page.rb
130
+ - lib/weneedfeed/page_schema.rb
131
+ - lib/weneedfeed/schema.rb
113
132
  - lib/weneedfeed/scraping.rb
114
133
  - lib/weneedfeed/version.rb
115
134
  - lib/weneedfeed/views.rb