weneedfeed 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0fdaa4a0f724b7018d6b1b2b585aeaf970eff37538df7ca7aaf760fc217b56dc
4
- data.tar.gz: 9f34a11d59aaada2d5afd689263f722c0143af5e59b9741e7d6f34d16d807c57
3
+ metadata.gz: 65814d68cfaf1936de388ce721e3aee49a0e4641c0ecd6ae2afb72c858fbd8f4
4
+ data.tar.gz: ed124803cfb9589dd3b8e44169454b07623a2460a7ac2412c1946d08e51d590f
5
5
  SHA512:
6
- metadata.gz: 878189a82437b83ed25d0c25c5a6de0f4b840c1daf117ccd83d31be1ecbbe2db0d97289bd5ab93e327508562843ddea253948cad63324d3557ab28a0d5052268
7
- data.tar.gz: 8c56392e937c331456b44c869a96316c892b5321fd85611325c91c3987de079d101144998a4399f4fa668c46181e3f0c51f79fe157bf333d211fcf5ae0aeec87
6
+ metadata.gz: 486e7af49032ebf8124d6360cd835ec5ede778c4dc6911259fcee753247166ef6c53e3468bb3c386a3e2d794b3939685795c230a4ff24e65a67e615c5b39a800
7
+ data.tar.gz: 636f09f3860a1a7a16bf425facd29988e7037ae35a9ab8a11b8a37118339c9bdf750336eb01b8c36e2bee0d8b81ed2b1a9846269096428b702c09b27b873617a
@@ -9,3 +9,6 @@ Metrics:
9
9
 
10
10
  Style/Documentation:
11
11
  Enabled: false
12
+
13
+ Style/TrailingCommaInArguments:
14
+ Enabled: false
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 0.6.0 - 2020-11-15
11
+
12
+ ### Changed
13
+
14
+ - Change pages schema from Hash to Array.
15
+ - Require hanami-router 2.0.0.alpha3 or later versions.
16
+ - Ignore pubDate when item.time is not found.
17
+ - Make some item_description_selector and item_time_selector optional.
18
+
10
19
  ## 0.5.0 - 2020-11-14
11
20
 
12
21
  ### Added
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- weneedfeed (0.5.0)
4
+ weneedfeed (0.6.0)
5
5
  faraday
6
+ hanami-router (= 2.0.0.alpha3)
6
7
  hibana
7
8
  nokogiri
8
9
  rack-capture (>= 0.4.0)
@@ -14,29 +15,28 @@ GEM
14
15
  addressable (2.7.0)
15
16
  public_suffix (>= 2.0.2, < 5.0)
16
17
  ast (2.4.1)
17
- concurrent-ruby (1.1.7)
18
18
  crack (0.4.4)
19
19
  diff-lcs (1.4.4)
20
20
  faraday (1.1.0)
21
21
  multipart-post (>= 1.2, < 3)
22
22
  ruby2_keywords
23
- hanami-router (1.3.2)
24
- hanami-utils (~> 1.3)
25
- http_router (= 0.11.2)
23
+ hanami-router (2.0.0.alpha3)
24
+ mustermann (~> 1.0)
25
+ mustermann-contrib (~> 1.0)
26
26
  rack (~> 2.0)
27
- hanami-utils (1.3.6)
28
- concurrent-ruby (~> 1.0)
29
- transproc (~> 1.0)
27
+ hansi (0.2.0)
30
28
  hashdiff (1.0.1)
31
29
  hibana (0.1.1)
32
30
  hanami-router
33
31
  rack
34
32
  tilt
35
- http_router (0.11.2)
36
- rack (>= 1.0.0)
37
- url_mount (~> 0.2.1)
38
33
  mini_portile2 (2.4.0)
39
34
  multipart-post (2.1.1)
35
+ mustermann (1.1.1)
36
+ ruby2_keywords (~> 0.0.1)
37
+ mustermann-contrib (1.1.1)
38
+ hansi (~> 0.2.0)
39
+ mustermann (= 1.1.1)
40
40
  nokogiri (1.10.10)
41
41
  mini_portile2 (~> 2.4.0)
42
42
  parallel (1.19.2)
@@ -80,10 +80,7 @@ GEM
80
80
  ruby2_keywords (0.0.2)
81
81
  thor (1.0.1)
82
82
  tilt (2.0.10)
83
- transproc (1.1.1)
84
83
  unicode-display_width (1.7.0)
85
- url_mount (0.2.1)
86
- rack
87
84
  webmock (3.9.3)
88
85
  addressable (>= 2.3.6)
89
86
  crack (>= 0.3.2)
data/README.md CHANGED
@@ -33,51 +33,79 @@ You need to write a schema file named with `weneedfeed.yml` to use this gem.
33
33
 
34
34
  ```yaml
35
35
  pages:
36
- example1:
37
- title: example site 1
36
+ - id: example1
37
+ title: Example feed with CSS Selector
38
38
  url: http://example.com/1
39
39
  item_selector: li
40
40
  item_description_selector: p:nth-child(3)
41
41
  item_link_selector: a
42
42
  item_time_selector: time
43
43
  item_title_selector: p:nth-child(2)
44
- example2:
45
- title: example site 2
44
+ - id: example2
45
+ title: Example feed with XPath
46
46
  url: http://example.com/2
47
47
  item_selector: //li
48
48
  item_description_selector: .//p[3]
49
- item_link_selector: .//a/@href
49
+ item_link_selector: .//a
50
50
  item_time_selector: .//time
51
51
  item_title_selector: .//p[2]
52
52
  ```
53
53
 
54
+ ### `id`
55
+
56
+ Feed ID.
57
+
58
+ - required
59
+ - Used for feed URL.
60
+
54
61
  ### `title`
55
62
 
56
- Feed title, used for RSS `<title>` element in `<channel>` element.
63
+ Feed title.
64
+
65
+ - required
66
+ - Used for RSS `<title>` element in `<channel>` element.
57
67
 
58
68
  ### `url`
59
69
 
60
- URL to fetch HTML page for building feed.
70
+ HTML source URL.
71
+
72
+ - required
73
+ - Used to fetch HTML page for building feed.
61
74
 
62
75
  ### `item_selector`
63
76
 
64
- CSS or XPath selector to search each item, equivalent unit to RSS `<item>` element.
77
+ CSS or XPath selector to search each item.
65
78
 
66
- ### `item_link_selector`
79
+ - required
80
+ - Equivalent unit to RSS `<item>` element.
67
81
 
68
- CSS or XPath selector to find `<a>` element in each item, used for `<link>` in `<item>`.
82
+ ### `item_link_selector`
69
83
 
70
- ### `item_time_selector`
84
+ CSS or XPath selector to find `<a>` element in each item.
71
85
 
72
- CSS or XPath selector to find element with datetime information in each item, used for `<pubDate>` in `<item>`. Its `datetime` attribute or its inner HTML is used to calculate datetime.
86
+ - required
87
+ - Used for `<link>` in `<item>`.
73
88
 
74
89
  ### `item_title_selector`
75
90
 
76
- CSS or XPath selector to find element with title information in each item, used for `<pubDate>` in `<item>`.
91
+ CSS or XPath selector to find element with title information in each item.
92
+
93
+ - required
94
+ - Used for `<title>` in `<item>`.
77
95
 
78
96
  ### `item_description_selector`
79
97
 
80
- CSS or XPath selector to find element with description information in each item, used for `<description>` in `<item>`.
98
+ CSS or XPath selector to find element with description information in each item.
99
+
100
+ - optional
101
+ - Used for `<description>` in `<item>`.
102
+
103
+ ### `item_time_selector`
104
+
105
+ CSS or XPath selector to find element with datetime information in each item.
106
+
107
+ - optional
108
+ - Used for `<pubDate>` in `<item>`. Its `datetime` attribute or its inner HTML is used to calculate datetime.
81
109
 
82
110
  ## Usage
83
111
 
@@ -7,8 +7,11 @@ module Weneedfeed
7
7
  autoload :Capture, 'weneedfeed/capture'
8
8
  autoload :Command, 'weneedfeed/command'
9
9
  autoload :Controllers, 'weneedfeed/controllers'
10
+ autoload :Helpers, 'weneedfeed/helpers'
10
11
  autoload :Item, 'weneedfeed/item'
11
12
  autoload :Page, 'weneedfeed/page'
13
+ autoload :PageSchema, 'weneedfeed/page_schema'
14
+ autoload :Schema, 'weneedfeed/schema'
12
15
  autoload :Scraping, 'weneedfeed/scraping'
13
16
  autoload :Views, 'weneedfeed/views'
14
17
  end
@@ -6,12 +6,12 @@ module Weneedfeed
6
6
  class Application < ::Hibana::Application
7
7
  route do
8
8
  get '/', to: ::Weneedfeed::Controllers::ShowTopPage, as: :top_page
9
- get '/feeds/:page_name.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
9
+ get '/feeds/:page_id.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
10
10
  end
11
11
 
12
12
  # @param [Hash] schema
13
13
  def initialize(schema:)
14
- @schema = schema
14
+ @schema = ::Weneedfeed::Schema.new(schema)
15
15
  super()
16
16
  end
17
17
 
@@ -23,8 +23,8 @@ module Weneedfeed
23
23
 
24
24
  # @return [Array<String>]
25
25
  def paths
26
- ['/'] + @schema['pages'].keys.map do |page_name|
27
- "/feeds/#{page_name}.xml"
26
+ ['/'] + @schema.page_ids.map do |page_id|
27
+ "/feeds/#{page_id}.xml"
28
28
  end
29
29
  end
30
30
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
+ autoload :Base, 'weneedfeed/controllers/base'
5
6
  autoload :ShowFeed, 'weneedfeed/controllers/show_feed'
6
7
  autoload :ShowTopPage, 'weneedfeed/controllers/show_top_page'
7
8
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Controllers
5
+ class Base < ::Hibana::Controller
6
+ include ::Weneedfeed::Helpers::Parameters
7
+
8
+ private
9
+
10
+ # @return [Weneedfeed::Schema]
11
+ def schema
12
+ request.env['weneedfeed.schema']
13
+ end
14
+ end
15
+ end
16
+ end
@@ -2,31 +2,22 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
- class ShowFeed < ::Hibana::Controller
5
+ class ShowFeed < ::Weneedfeed::Controllers::Base
6
6
  def call
7
- env = request.env
8
- page_name = env.dig(
9
- 'router.params',
10
- :page_name
11
- )
12
- properties = env.dig(
13
- 'weneedfeed.schema',
14
- 'pages',
15
- page_name
16
- )
17
- unless properties
7
+ page_schema = schema.find_page_schema(path_parameters[:page_id])
8
+ unless page_schema
18
9
  response.status = 404
19
10
  return
20
11
  end
21
12
 
22
13
  scraping = ::Weneedfeed::Scraping.new(
23
- item_description_selector: properties['item_description_selector'],
24
- item_link_selector: properties['item_link_selector'],
25
- item_time_selector: properties['item_time_selector'],
26
- item_title_selector: properties['item_title_selector'],
27
- item_selector: properties['item_selector'],
28
- title: properties['title'],
29
- url: properties['url']
14
+ item_description_selector: page_schema.item_description_selector,
15
+ item_link_selector: page_schema.item_link_selector,
16
+ item_time_selector: page_schema.item_time_selector,
17
+ item_title_selector: page_schema.item_title_selector,
18
+ item_selector: page_schema.item_selector,
19
+ title: page_schema.title,
20
+ url: page_schema.url,
30
21
  )
31
22
  page = scraping.call
32
23
 
@@ -2,18 +2,13 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
- class ShowTopPage < ::Hibana::Controller
5
+ class ShowTopPage < ::Weneedfeed::Controllers::Base
6
6
  def call
7
- pages = request.env.dig(
8
- 'weneedfeed.schema',
9
- 'pages'
10
- ).sort_by do |_key, value|
11
- value['title']
12
- end
7
+ page_schemata = schema.page_schemata.sort_by(&:title)
13
8
  response.content_type = 'text/html'
14
9
  response.write(
15
10
  ::Weneedfeed::Views::ShowTopPage.new(
16
- pages: pages,
11
+ page_schemata: page_schemata,
17
12
  partial_template_path: ::File.expand_path(
18
13
  'templates/show_top_page.html.erb',
19
14
  "#{__dir__}/../../.."
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Helpers
5
+ autoload :Parameters, 'weneedfeed/helpers/parameters'
6
+ end
7
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Helpers
5
+ module Parameters
6
+ private
7
+
8
+ # @return [Hash]
9
+ def path_parameters
10
+ request.env['router.params']
11
+ end
12
+ end
13
+ end
14
+ end
@@ -15,8 +15,8 @@ module Weneedfeed
15
15
  end
16
16
  end
17
17
 
18
- # @param [String] description_selector
19
- # @param [String] link_selector
18
+ # @param [String, nil] description_selector
19
+ # @param [String, nil] link_selector
20
20
  # @param [Nokogiri::Node] node
21
21
  # @param [String] time_selector
22
22
  # @param [String] title_selector
@@ -39,6 +39,8 @@ module Weneedfeed
39
39
 
40
40
  # @return [String, nil]
41
41
  def description
42
+ return unless @description_selector
43
+
42
44
  @node.at(@description_selector)&.inner_html
43
45
  end
44
46
 
@@ -52,7 +54,12 @@ module Weneedfeed
52
54
 
53
55
  # @return [Time, nil]
54
56
  def time
55
- self.class.parse_time(time_string)
57
+ return unless @time_selector
58
+
59
+ string = time_string
60
+ return unless string
61
+
62
+ self.class.parse_time(string)
56
63
  end
57
64
 
58
65
  # @return [String]
@@ -67,7 +74,7 @@ module Weneedfeed
67
74
  @node.at(@time_selector)
68
75
  end
69
76
 
70
- # @return [String]
77
+ # @return [String, nil]
71
78
  def time_string
72
79
  node = time_node
73
80
  return unless node
@@ -8,9 +8,9 @@ module Weneedfeed
8
8
  # @return [String]
9
9
  attr_reader :url
10
10
 
11
- # @param [String] item_description_selector
11
+ # @param [String, nil] item_description_selector
12
12
  # @param [String] item_link_selector
13
- # @param [String] item_time_selector
13
+ # @param [String, nil] item_time_selector
14
14
  # @param [String] item_title_selector
15
15
  # @param [String] item_selector
16
16
  # @param [Nokogiri::Node] node
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ PageSchema = Struct.new(
5
+ :id,
6
+ :item_description_selector,
7
+ :item_link_selector,
8
+ :item_time_selector,
9
+ :item_title_selector,
10
+ :item_selector,
11
+ :title,
12
+ :url,
13
+ keyword_init: true,
14
+ )
15
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ class Schema
5
+ # @param [Hash] raw
6
+ def initialize(raw)
7
+ @raw = raw
8
+ end
9
+
10
+ # @param [String, nl] page_schema_id
11
+ # @return [Weneedfeed::PageSchema]
12
+ def find_page_schema(page_schema_id)
13
+ page_schemata.find do |page_schema|
14
+ page_schema.id == page_schema_id
15
+ end
16
+ end
17
+
18
+ # @return [Array<String>]
19
+ def page_ids
20
+ page_schemata.map(&:id)
21
+ end
22
+
23
+ # @return [Array<Weneedfeed::PageSchema>]
24
+ def page_schemata
25
+ @raw['pages'].map do |hash|
26
+ ::Weneedfeed::PageSchema.new(
27
+ id: hash['id'],
28
+ item_link_selector: hash['item_link_selector'],
29
+ item_time_selector: hash['item_time_selector'],
30
+ item_title_selector: hash['item_title_selector'],
31
+ item_selector: hash['item_selector'],
32
+ title: hash['title'],
33
+ url: hash['url'],
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
@@ -5,9 +5,9 @@ require 'nokogiri'
5
5
 
6
6
  module Weneedfeed
7
7
  class Scraping
8
- # @param [String] item_description_selector
8
+ # @param [String, nil] item_description_selector
9
9
  # @param [String] item_link_selector
10
- # @param [String] item_time_selector
10
+ # @param [String, nil] item_time_selector
11
11
  # @param [String] item_title_selector
12
12
  # @param [String] item_selector
13
13
  # @param [String] title
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Weneedfeed
4
- VERSION = '0.5.0'
4
+ VERSION = '0.6.0'
5
5
  end
@@ -3,6 +3,8 @@
3
3
  module Weneedfeed
4
4
  module Views
5
5
  class ShowFeed < ::Hibana::View
6
+ include ::Weneedfeed::Helpers::Parameters
7
+
6
8
  # @param [Weneedfeed::Page] page
7
9
  def initialize(page:, **argv)
8
10
  super(**argv)
@@ -13,7 +15,7 @@ module Weneedfeed
13
15
 
14
16
  # @return [String]
15
17
  def top_page_path
16
- request.path.delete_suffix(router.path(:feed, page_name: page_name))
18
+ request.path.delete_suffix(router.path(:feed, page_id: page_id))
17
19
  end
18
20
 
19
21
  # @return [Enumerable<Weneedfeed::Item>]
@@ -24,8 +26,8 @@ module Weneedfeed
24
26
  end
25
27
 
26
28
  # @return [String]
27
- def page_name
28
- request.env['router.params'][:page_name]
29
+ def page_id
30
+ path_parameters[:page_id]
29
31
  end
30
32
 
31
33
  # @return [Hanami::Router]
@@ -3,10 +3,10 @@
3
3
  module Weneedfeed
4
4
  module Views
5
5
  class ShowTopPage < ::Hibana::View
6
- # @param [Array<Weneedfeed::Page>] pages
7
- def initialize(pages:, **argv)
6
+ # @param [Array<Hash>] page_schemata
7
+ def initialize(page_schemata:, **argv)
8
8
  super(**argv)
9
- @pages = pages
9
+ @page_schemata = page_schemata
10
10
  end
11
11
 
12
12
  private
@@ -16,10 +16,10 @@ module Weneedfeed
16
16
  request.path.delete_suffix(router.path(:top_page))
17
17
  end
18
18
 
19
- # @param [String] page_name
19
+ # @param [String] page_id
20
20
  # @return [String]
21
- def feed_path(page_name:)
22
- "#{base_path}#{router.path(:feed, page_name: page_name)}"
21
+ def feed_path(page_id:)
22
+ "#{base_path}#{router.path(:feed, page_id: page_id)}"
23
23
  end
24
24
 
25
25
  # @return [Hanami::Router]
@@ -12,7 +12,9 @@
12
12
  <item>
13
13
  <title><![CDATA[<%= item.title %>]]></title>
14
14
  <link><%= item.link %></link>
15
- <pubDate><%= item.time.rfc822 %></pubDate>
15
+ <% if item.time %>
16
+ <pubDate><%= item.time.rfc822 %></pubDate>
17
+ <% end %>
16
18
  <description><![CDATA[<%= item.description %>]]></description>
17
19
  <content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
18
20
  <guid isPermaLink="true"><%= item.link %></guid>
@@ -7,9 +7,9 @@
7
7
  </head>
8
8
  <body>
9
9
  <ul>
10
- <% @pages.each do |page_name, hash| %>
10
+ <% @page_schemata.each do |page_schema| %>
11
11
  <li>
12
- <a href="<%= feed_path(page_name: page_name) %>"><%= hash['title'] %></a>
12
+ <a href="<%= feed_path(page_id: page_schema.id) %>"><%= page_schema.title %></a>
13
13
  </li>
14
14
  <% end %>
15
15
  </ul>
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.require_paths = ['lib']
27
27
 
28
28
  spec.add_runtime_dependency 'faraday'
29
+ spec.add_runtime_dependency 'hanami-router', '2.0.0.alpha3'
29
30
  spec.add_runtime_dependency 'hibana'
30
31
  spec.add_runtime_dependency 'nokogiri'
31
32
  spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weneedfeed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-14 00:00:00.000000000 Z
11
+ date: 2020-11-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hanami-router
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.0.alpha3
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 2.0.0.alpha3
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: hibana
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -106,10 +120,15 @@ files:
106
120
  - lib/weneedfeed/capture.rb
107
121
  - lib/weneedfeed/command.rb
108
122
  - lib/weneedfeed/controllers.rb
123
+ - lib/weneedfeed/controllers/base.rb
109
124
  - lib/weneedfeed/controllers/show_feed.rb
110
125
  - lib/weneedfeed/controllers/show_top_page.rb
126
+ - lib/weneedfeed/helpers.rb
127
+ - lib/weneedfeed/helpers/parameters.rb
111
128
  - lib/weneedfeed/item.rb
112
129
  - lib/weneedfeed/page.rb
130
+ - lib/weneedfeed/page_schema.rb
131
+ - lib/weneedfeed/schema.rb
113
132
  - lib/weneedfeed/scraping.rb
114
133
  - lib/weneedfeed/version.rb
115
134
  - lib/weneedfeed/views.rb