weneedfeed 0.3.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b201152f3a5b05f2c336caf0ba02416eea09da02834a46818dc00c86596ee0d3
4
- data.tar.gz: 0fc63fe70919ac5284297c309545d0d1ea3a3253166d36f434bb6b598aa92721
3
+ metadata.gz: c580b642b78a2c26bf06d840fad6a745d498630779c670784deba389d9b28f1a
4
+ data.tar.gz: e29a86399e141b1c650a064cf6c45066149d93dee722b70fa657366519015f7b
5
5
  SHA512:
6
- metadata.gz: a318105d868fc4ca2b91a03541feffde8bb1af257b4c3126716c2988f21703f7ee87699183ab0d8f3ab67d111008af9485ee4e219d464ef618e459b5a8b8fc1e
7
- data.tar.gz: 726e20a19227146a81a309b32837396753a1fe572fe1ec6fd8ce44767dab61ebc337ccaa5af53d8d1449ba987bcefd76d6486db3988f4c95bd8d316f2440fd57
6
+ metadata.gz: 3293b7dbba14a24988a77bb2070adfef0a4c4f343197d5633076c497747d6f66d9642b89e37ec11f9d2c51a0f91b18c8dfd614cf053339adfc3c7b4390702774
7
+ data.tar.gz: f9549a610dec4b77e9e6f86829890bc31928f086652480cbe2c501b1c9fda479a1a0d549ec6efb3a40d1f8ea4fcdb4cc4ccc198403c57ab0a2dbf3c9010ea27f
@@ -5,16 +5,18 @@ on:
5
5
  push:
6
6
  branches:
7
7
  - master
8
+
8
9
  jobs:
9
10
  build:
10
11
  runs-on: ubuntu-18.04
11
12
  steps:
12
13
  - uses: actions/checkout@v2
13
- - uses: actions/setup-ruby@v1
14
+ - uses: ruby/setup-ruby@v1
14
15
  with:
15
- ruby-version: '2.7.2'
16
- - run: bundle install --jobs=$(($(nproc) - 1)) --retry=3
17
- - run: bundle exec rubocop --color --parallel
16
+ bundler-cache: true
17
+ ruby-version: 2.7.2
18
+ - uses: r7kamura/rubocop-problem-matchers-action@v1
19
+ - run: bundle exec rubocop --parallel
18
20
  - run: bundle exec rspec --force-color
19
21
 
20
22
 
data/.rspec CHANGED
@@ -1 +1,2 @@
1
+ --format doc
1
2
  --require spec_helper
@@ -9,3 +9,6 @@ Metrics:
9
9
 
10
10
  Style/Documentation:
11
11
  Enabled: false
12
+
13
+ Style/TrailingCommaInArguments:
14
+ Enabled: false
@@ -7,6 +7,61 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 0.6.1 - 2020-11-15
11
+
12
+ ### Changed
13
+
14
+ - Require hibana 0.2 or later.
15
+
16
+ ### Fixed
17
+
18
+ - Fix installation error caused by hanami-router version restriction.
19
+
20
+ ### Removed
21
+
22
+ - Remove direct dependency on hanami-router.
23
+
24
+ ## 0.6.0 - 2020-11-15
25
+
26
+ ### Changed
27
+
28
+ - Change pages schema from Hash to Array.
29
+ - Require hanami-router 2.0.0.alpha3 or later versions.
30
+ - Ignore pubDate when item.time is not found.
31
+ - Make some item_description_selector and item_time_selector optional.
32
+
33
+ ## 0.5.0 - 2020-11-14
34
+
35
+ ### Added
36
+
37
+ - Add datetime attribute support on time element.
38
+
39
+ ### Changed
40
+
41
+ - Change schema format about selectors.
42
+
43
+ ## 0.4.1 - 2020-11-08
44
+
45
+ ### Fixed
46
+
47
+ - Fix error when item description is not found.
48
+
49
+ ## 0.4.0 - 2020-11-08
50
+
51
+ ### Added
52
+
53
+ - Add CSS selector support.
54
+
55
+ ### Changed
56
+
57
+ - Change schema key from `xpath` to `selectors`.
58
+ - Change item_link_selector target from href attribute to a element.
59
+ - Change some argument names from `_xpath` to `_selector`.
60
+ - Change `weneedfeed build` description.
61
+ - Change `weneedfeed server` description.
62
+ - Change channel link from feed URL to top page URL.
63
+ - Sort top page feeds by its title.
64
+
10
65
  ## 0.3.0 - 2020-11-08
11
66
 
12
67
  ### Added
@@ -1,9 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- weneedfeed (0.3.0)
4
+ weneedfeed (0.6.1)
5
5
  faraday
6
- hibana
6
+ hibana (>= 0.2)
7
7
  nokogiri
8
8
  rack-capture (>= 0.4.0)
9
9
  thor
@@ -14,29 +14,28 @@ GEM
14
14
  addressable (2.7.0)
15
15
  public_suffix (>= 2.0.2, < 5.0)
16
16
  ast (2.4.1)
17
- concurrent-ruby (1.1.7)
18
17
  crack (0.4.4)
19
18
  diff-lcs (1.4.4)
20
19
  faraday (1.1.0)
21
20
  multipart-post (>= 1.2, < 3)
22
21
  ruby2_keywords
23
- hanami-router (1.3.2)
24
- hanami-utils (~> 1.3)
25
- http_router (= 0.11.2)
22
+ hanami-router (2.0.0.alpha3)
23
+ mustermann (~> 1.0)
24
+ mustermann-contrib (~> 1.0)
26
25
  rack (~> 2.0)
27
- hanami-utils (1.3.6)
28
- concurrent-ruby (~> 1.0)
29
- transproc (~> 1.0)
26
+ hansi (0.2.0)
30
27
  hashdiff (1.0.1)
31
- hibana (0.1.1)
32
- hanami-router
28
+ hibana (0.2.0)
29
+ hanami-router (>= 2.0.0.alpha3)
33
30
  rack
34
31
  tilt
35
- http_router (0.11.2)
36
- rack (>= 1.0.0)
37
- url_mount (~> 0.2.1)
38
32
  mini_portile2 (2.4.0)
39
33
  multipart-post (2.1.1)
34
+ mustermann (1.1.1)
35
+ ruby2_keywords (~> 0.0.1)
36
+ mustermann-contrib (1.1.1)
37
+ hansi (~> 0.2.0)
38
+ mustermann (= 1.1.1)
40
39
  nokogiri (1.10.10)
41
40
  mini_portile2 (~> 2.4.0)
42
41
  parallel (1.19.2)
@@ -80,10 +79,7 @@ GEM
80
79
  ruby2_keywords (0.0.2)
81
80
  thor (1.0.1)
82
81
  tilt (2.0.10)
83
- transproc (1.1.1)
84
82
  unicode-display_width (1.7.0)
85
- url_mount (0.2.1)
86
- rack
87
83
  webmock (3.9.3)
88
84
  addressable (>= 2.3.6)
89
85
  crack (>= 0.3.2)
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![](https://badge.fury.io/rb/weneedfeed.svg)](https://rubygems.org/gems/weneedfeed)
4
4
  [![](https://github.com/r7kamura/weneedfeed/workflows/test/badge.svg)](https://github.com/r7kamura/weneedfeed/actions?query=workflow%3Atest)
5
5
 
6
- Generate feeds from URL and XPath.
6
+ Generate feeds from URL and selectors.
7
7
 
8
8
  ## Installation
9
9
 
@@ -25,38 +25,124 @@ Or install it yourself as:
25
25
  gem install weneedfeed
26
26
  ```
27
27
 
28
- ## Usage
28
+ ## Schema
29
+
30
+ You need to write a schema file named with `weneedfeed.yml` to use this gem.
29
31
 
30
- Write schema:
32
+ ### Example
31
33
 
32
34
  ```yaml
33
35
  pages:
34
- example:
35
- title: example name
36
- url: http://example.com/
37
- xpath:
38
- item: //li
39
- item_description: .//p[3]
40
- item_link: .//a/@href
41
- item_time: .//time/@datetime
42
- item_title: .//p[2]
36
+ - id: example1
37
+ title: Example feed with CSS Selector
38
+ url: http://example.com/1
39
+ item_selector: li
40
+ item_description_selector: p:nth-child(3)
41
+ item_link_selector: a
42
+ item_time_selector: time
43
+ item_title_selector: p:nth-child(2)
44
+ - id: example2
45
+ title: Example feed with XPath
46
+ url: http://example.com/2
47
+ item_selector: //li
48
+ item_description_selector: .//p[3]
49
+ item_link_selector: .//a
50
+ item_time_selector: .//time
51
+ item_title_selector: .//p[2]
43
52
  ```
44
53
 
45
- And then call `Weneedfeed::Capture`:
54
+ ### `id`
46
55
 
47
- ```ruby
48
- require 'weneedfeed'
56
+ Feed ID.
57
+
58
+ - required
59
+ - Used for feed URL.
60
+
61
+ ### `title`
62
+
63
+ Feed title.
64
+
65
+ - required
66
+ - Used for RSS `<title>` element in `<channel>` element.
67
+
68
+ ### `url`
69
+
70
+ HTML source URL.
71
+
72
+ - required
73
+ - Used to fetch HTML page for building feed.
74
+
75
+ ### `item_selector`
76
+
77
+ CSS or XPath selector to search each item.
78
+
79
+ - required
80
+ - Equivalent unit to RSS `<item>` element.
81
+
82
+ ### `item_link_selector`
83
+
84
+ CSS or XPath selector to find `<a>` element in each item.
85
+
86
+ - required
87
+ - Used for `<link>` in `<item>`.
88
+
89
+ ### `item_title_selector`
90
+
91
+ CSS or XPath selector to find element with title information in each item.
92
+
93
+ - required
94
+ - Used for `<title>` in `<item>`.
95
+
96
+ ### `item_description_selector`
97
+
98
+ CSS or XPath selector to find element with description information in each item.
99
+
100
+ - optional
101
+ - Used for `<description>` in `<item>`.
102
+
103
+ ### `item_time_selector`
104
+
105
+ CSS or XPath selector to find element with datetime information in each item.
106
+
107
+ - optional
108
+ - Used for `<pubDate>` in `<item>`. Its `datetime` attribute or its inner HTML is used to calculate datetime.
109
+
110
+ ## Usage
111
+
112
+ ### Build
113
+
114
+ Run `weneedfeed build` to build static files.
115
+
116
+ ```
117
+ Usage:
118
+ weneedfeed build --base-url=BASE_URL
119
+
120
+ Options:
121
+ --base-url=BASE_URL # Base URL where to locate built files. (e.g. `"https://user.github.io/repo"`)
122
+ [--schema-path=SCHEMA_PATH] # Path to weneedfeed YAML schema file.
123
+ # Default: weneedfeed.yml
124
+
125
+ Build static files.
126
+ ```
127
+
128
+ ### Server
129
+
130
+ Run `weneedfeed server` to run HTTP server.
131
+
132
+ ```
133
+ Usage:
134
+ weneedfeed server
135
+
136
+ Options:
137
+ [--schema-path=SCHEMA_PATH] # Path to weneedfeed YAML schema file.
138
+ # Default: weneedfeed.yml
49
139
 
50
- Weneedfeed::Capture.call(
51
- base_url: 'https://user.github.io/repo',
52
- schema_path: 'schema.yml'
53
- )
140
+ Run HTTP server.
54
141
  ```
55
142
 
56
- These files will be generated:
143
+ ## GitHub Actions Integration
57
144
 
58
- - output/index.html
59
- - output/feeds/example.xml
145
+ Use [weneedfeed-action](https://github.com/r7kamura/weneedfeed-action) for invoking weneedfeed on GitHub Actions.
60
146
 
61
147
  ## Development
62
148
 
@@ -7,8 +7,11 @@ module Weneedfeed
7
7
  autoload :Capture, 'weneedfeed/capture'
8
8
  autoload :Command, 'weneedfeed/command'
9
9
  autoload :Controllers, 'weneedfeed/controllers'
10
+ autoload :Helpers, 'weneedfeed/helpers'
10
11
  autoload :Item, 'weneedfeed/item'
11
12
  autoload :Page, 'weneedfeed/page'
13
+ autoload :PageSchema, 'weneedfeed/page_schema'
14
+ autoload :Schema, 'weneedfeed/schema'
12
15
  autoload :Scraping, 'weneedfeed/scraping'
13
16
  autoload :Views, 'weneedfeed/views'
14
17
  end
@@ -6,12 +6,12 @@ module Weneedfeed
6
6
  class Application < ::Hibana::Application
7
7
  route do
8
8
  get '/', to: ::Weneedfeed::Controllers::ShowTopPage, as: :top_page
9
- get '/feeds/:page_name.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
9
+ get '/feeds/:page_id.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
10
10
  end
11
11
 
12
12
  # @param [Hash] schema
13
13
  def initialize(schema:)
14
- @schema = schema
14
+ @schema = ::Weneedfeed::Schema.new(schema)
15
15
  super()
16
16
  end
17
17
 
@@ -23,8 +23,8 @@ module Weneedfeed
23
23
 
24
24
  # @return [Array<String>]
25
25
  def paths
26
- ['/'] + @schema['pages'].keys.map do |page_name|
27
- "/feeds/#{page_name}.xml"
26
+ ['/'] + @schema.page_ids.map do |page_id|
27
+ "/feeds/#{page_id}.xml"
28
28
  end
29
29
  end
30
30
  end
@@ -16,7 +16,7 @@ module Weneedfeed
16
16
 
17
17
  desc(
18
18
  'build',
19
- 'Build static files for feeds.'
19
+ 'Build static files.'
20
20
  )
21
21
 
22
22
  method_option(
@@ -44,7 +44,7 @@ module Weneedfeed
44
44
 
45
45
  desc(
46
46
  'server',
47
- 'Run HTTP server'
47
+ 'Run HTTP server.'
48
48
  )
49
49
 
50
50
  method_option(
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
+ autoload :Base, 'weneedfeed/controllers/base'
5
6
  autoload :ShowFeed, 'weneedfeed/controllers/show_feed'
6
7
  autoload :ShowTopPage, 'weneedfeed/controllers/show_top_page'
7
8
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Controllers
5
+ class Base < ::Hibana::Controller
6
+ include ::Weneedfeed::Helpers::Parameters
7
+
8
+ private
9
+
10
+ # @return [Weneedfeed::Schema]
11
+ def schema
12
+ request.env['weneedfeed.schema']
13
+ end
14
+ end
15
+ end
16
+ end
@@ -2,31 +2,22 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
- class ShowFeed < ::Hibana::Controller
5
+ class ShowFeed < ::Weneedfeed::Controllers::Base
6
6
  def call
7
- env = request.env
8
- page_name = env.dig(
9
- 'router.params',
10
- :page_name
11
- )
12
- properties = env.dig(
13
- 'weneedfeed.schema',
14
- 'pages',
15
- page_name
16
- )
17
- unless properties
7
+ page_schema = schema.find_page_schema(path_parameters[:page_id])
8
+ unless page_schema
18
9
  response.status = 404
19
10
  return
20
11
  end
21
12
 
22
13
  scraping = ::Weneedfeed::Scraping.new(
23
- item_description_xpath: properties['xpath']['item_description'],
24
- item_link_xpath: properties['xpath']['item_link'],
25
- item_time_xpath: properties['xpath']['item_time'],
26
- item_title_xpath: properties['xpath']['item_title'],
27
- item_xpath: properties['xpath']['item'],
28
- title: properties['title'],
29
- url: properties['url']
14
+ item_description_selector: page_schema.item_description_selector,
15
+ item_link_selector: page_schema.item_link_selector,
16
+ item_time_selector: page_schema.item_time_selector,
17
+ item_title_selector: page_schema.item_title_selector,
18
+ item_selector: page_schema.item_selector,
19
+ title: page_schema.title,
20
+ url: page_schema.url,
30
21
  )
31
22
  page = scraping.call
32
23
 
@@ -2,16 +2,13 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
- class ShowTopPage < ::Hibana::Controller
5
+ class ShowTopPage < ::Weneedfeed::Controllers::Base
6
6
  def call
7
- pages = request.env.dig(
8
- 'weneedfeed.schema',
9
- 'pages'
10
- )
7
+ page_schemata = schema.page_schemata.sort_by(&:title)
11
8
  response.content_type = 'text/html'
12
9
  response.write(
13
10
  ::Weneedfeed::Views::ShowTopPage.new(
14
- pages: pages,
11
+ page_schemata: page_schemata,
15
12
  partial_template_path: ::File.expand_path(
16
13
  'templates/show_top_page.html.erb',
17
14
  "#{__dir__}/../../.."
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Helpers
5
+ autoload :Parameters, 'weneedfeed/helpers/parameters'
6
+ end
7
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Helpers
5
+ module Parameters
6
+ private
7
+
8
+ # @return [Hash]
9
+ def path_parameters
10
+ request.env['router.params']
11
+ end
12
+ end
13
+ end
14
+ end
@@ -15,56 +15,71 @@ module Weneedfeed
15
15
  end
16
16
  end
17
17
 
18
- # @param [String] description_xpath
19
- # @param [String] link_xpath
18
+ # @param [String, nil] description_selector
19
+ # @param [String, nil] link_selector
20
20
  # @param [Nokogiri::Node] node
21
- # @param [String] time_xpath
22
- # @param [String] title_xpath
21
+ # @param [String] time_selector
22
+ # @param [String] title_selector
23
23
  # @param [String] url
24
24
  def initialize(
25
- description_xpath:,
26
- link_xpath:,
25
+ description_selector:,
26
+ link_selector:,
27
27
  node:,
28
- time_xpath:,
29
- title_xpath:,
28
+ time_selector:,
29
+ title_selector:,
30
30
  url:
31
31
  )
32
- @description_xpath = description_xpath
33
- @link_xpath = link_xpath
32
+ @description_selector = description_selector
33
+ @link_selector = link_selector
34
34
  @node = node
35
- @time_xpath = time_xpath
36
- @title_xpath = title_xpath
35
+ @time_selector = time_selector
36
+ @title_selector = title_selector
37
37
  @url = url
38
38
  end
39
39
 
40
40
  # @return [String, nil]
41
41
  def description
42
- @node.xpath(@description_xpath).inner_html
42
+ return unless @description_selector
43
+
44
+ @node.at(@description_selector)&.inner_html
43
45
  end
44
46
 
45
47
  # @return [String]
46
48
  def link
47
49
  ::URI.join(
48
50
  @url,
49
- @node.xpath(@link_xpath).inner_html
51
+ @node.at(@link_selector)['href']
50
52
  ).to_s
51
53
  end
52
54
 
53
55
  # @return [Time, nil]
54
56
  def time
55
- self.class.parse_time(time_string)
57
+ return unless @time_selector
58
+
59
+ string = time_string
60
+ return unless string
61
+
62
+ self.class.parse_time(string)
56
63
  end
57
64
 
58
- # @return [String, nil]
65
+ # @return [String]
59
66
  def title
60
- @node.xpath(@title_xpath).inner_text
67
+ @node.at(@title_selector).inner_text
61
68
  end
62
69
 
63
70
  private
64
71
 
65
- # @return [String]
72
+ # @return [Nokogiri::Node, nil]
73
+ def time_node
74
+ @node.at(@time_selector)
75
+ end
76
+
77
+ # @return [String, nil]
66
78
  def time_string
67
- @node.xpath(@time_xpath).inner_html
79
+ node = time_node
80
+ return unless node
81
+
82
+ node['datetime'] || node.inner_html
68
83
  end
69
84
  end
70
85
  end
@@ -8,29 +8,29 @@ module Weneedfeed
8
8
  # @return [String]
9
9
  attr_reader :url
10
10
 
11
- # @param [String] item_description_xpath
12
- # @param [String] item_link_xpath
13
- # @param [String] item_time_xpath
14
- # @param [String] item_title_xpath
15
- # @param [String] item_xpath
11
+ # @param [String, nil] item_description_selector
12
+ # @param [String] item_link_selector
13
+ # @param [String, nil] item_time_selector
14
+ # @param [String] item_title_selector
15
+ # @param [String] item_selector
16
16
  # @param [Nokogiri::Node] node
17
17
  # @param [String] title
18
18
  # @param [String] url
19
19
  def initialize(
20
- item_description_xpath:,
21
- item_link_xpath:,
22
- item_time_xpath:,
23
- item_title_xpath:,
24
- item_xpath:,
20
+ item_description_selector:,
21
+ item_link_selector:,
22
+ item_time_selector:,
23
+ item_title_selector:,
24
+ item_selector:,
25
25
  node:,
26
26
  title:,
27
27
  url:
28
28
  )
29
- @item_description_xpath = item_description_xpath
30
- @item_link_xpath = item_link_xpath
31
- @item_time_xpath = item_time_xpath
32
- @item_title_xpath = item_title_xpath
33
- @item_xpath = item_xpath
29
+ @item_description_selector = item_description_selector
30
+ @item_link_selector = item_link_selector
31
+ @item_time_selector = item_time_selector
32
+ @item_title_selector = item_title_selector
33
+ @item_selector = item_selector
34
34
  @node = node
35
35
  @title = title
36
36
  @url = url
@@ -38,13 +38,13 @@ module Weneedfeed
38
38
 
39
39
  # @return [Array<Weneedfeed::Item>]
40
40
  def items
41
- @node.xpath(@item_xpath).map do |node|
41
+ @node.search(@item_selector).map do |node|
42
42
  ::Weneedfeed::Item.new(
43
- description_xpath: @item_description_xpath,
44
- link_xpath: @item_link_xpath,
43
+ description_selector: @item_description_selector,
44
+ link_selector: @item_link_selector,
45
45
  node: node,
46
- time_xpath: @item_time_xpath,
47
- title_xpath: @item_title_xpath,
46
+ time_selector: @item_time_selector,
47
+ title_selector: @item_title_selector,
48
48
  url: @url
49
49
  )
50
50
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ PageSchema = Struct.new(
5
+ :id,
6
+ :item_description_selector,
7
+ :item_link_selector,
8
+ :item_time_selector,
9
+ :item_title_selector,
10
+ :item_selector,
11
+ :title,
12
+ :url,
13
+ keyword_init: true,
14
+ )
15
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ class Schema
5
+ # @param [Hash] raw
6
+ def initialize(raw)
7
+ @raw = raw
8
+ end
9
+
10
+ # @param [String, nl] page_schema_id
11
+ # @return [Weneedfeed::PageSchema]
12
+ def find_page_schema(page_schema_id)
13
+ page_schemata.find do |page_schema|
14
+ page_schema.id == page_schema_id
15
+ end
16
+ end
17
+
18
+ # @return [Array<String>]
19
+ def page_ids
20
+ page_schemata.map(&:id)
21
+ end
22
+
23
+ # @return [Array<Weneedfeed::PageSchema>]
24
+ def page_schemata
25
+ @raw['pages'].map do |hash|
26
+ ::Weneedfeed::PageSchema.new(
27
+ id: hash['id'],
28
+ item_link_selector: hash['item_link_selector'],
29
+ item_time_selector: hash['item_time_selector'],
30
+ item_title_selector: hash['item_title_selector'],
31
+ item_selector: hash['item_selector'],
32
+ title: hash['title'],
33
+ url: hash['url'],
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
@@ -5,27 +5,27 @@ require 'nokogiri'
5
5
 
6
6
  module Weneedfeed
7
7
  class Scraping
8
- # @param [String] item_description_xpath
9
- # @param [String] item_link_xpath
10
- # @param [String] item_time_xpath
11
- # @param [String] item_title_xpath
12
- # @param [String] item_xpath
8
+ # @param [String, nil] item_description_selector
9
+ # @param [String] item_link_selector
10
+ # @param [String, nil] item_time_selector
11
+ # @param [String] item_title_selector
12
+ # @param [String] item_selector
13
13
  # @param [String] title
14
14
  # @param [String] url
15
15
  def initialize(
16
- item_description_xpath:,
17
- item_link_xpath:,
18
- item_time_xpath:,
19
- item_title_xpath:,
20
- item_xpath:,
16
+ item_description_selector:,
17
+ item_link_selector:,
18
+ item_time_selector:,
19
+ item_title_selector:,
20
+ item_selector:,
21
21
  title:,
22
22
  url:
23
23
  )
24
- @item_description_xpath = item_description_xpath
25
- @item_link_xpath = item_link_xpath
26
- @item_time_xpath = item_time_xpath
27
- @item_title_xpath = item_title_xpath
28
- @item_xpath = item_xpath
24
+ @item_description_selector = item_description_selector
25
+ @item_link_selector = item_link_selector
26
+ @item_time_selector = item_time_selector
27
+ @item_title_selector = item_title_selector
28
+ @item_selector = item_selector
29
29
  @title = title
30
30
  @url = url
31
31
  end
@@ -34,11 +34,11 @@ module Weneedfeed
34
34
  def call
35
35
  ::Weneedfeed::Page.new(
36
36
  node: parsed_body,
37
- item_description_xpath: @item_description_xpath,
38
- item_xpath: @item_xpath,
39
- item_link_xpath: @item_link_xpath,
40
- item_time_xpath: @item_time_xpath,
41
- item_title_xpath: @item_title_xpath,
37
+ item_description_selector: @item_description_selector,
38
+ item_selector: @item_selector,
39
+ item_link_selector: @item_link_selector,
40
+ item_time_selector: @item_time_selector,
41
+ item_title_selector: @item_title_selector,
42
42
  title: @title,
43
43
  url: @url
44
44
  )
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Weneedfeed
4
- VERSION = '0.3.0'
4
+ VERSION = '0.6.1'
5
5
  end
@@ -3,6 +3,8 @@
3
3
  module Weneedfeed
4
4
  module Views
5
5
  class ShowFeed < ::Hibana::View
6
+ include ::Weneedfeed::Helpers::Parameters
7
+
6
8
  # @param [Weneedfeed::Page] page
7
9
  def initialize(page:, **argv)
8
10
  super(**argv)
@@ -11,6 +13,11 @@ module Weneedfeed
11
13
 
12
14
  private
13
15
 
16
+ # @return [String]
17
+ def top_page_path
18
+ request.path.delete_suffix(router.path(:feed, page_id: page_id))
19
+ end
20
+
14
21
  # @return [Enumerable<Weneedfeed::Item>]
15
22
  def items
16
23
  @page.items.sort_by do |item|
@@ -19,8 +26,13 @@ module Weneedfeed
19
26
  end
20
27
 
21
28
  # @return [String]
22
- def page_name
23
- request.env['router.params'][:page_name]
29
+ def page_id
30
+ path_parameters[:page_id]
31
+ end
32
+
33
+ # @return [Hanami::Router]
34
+ def router
35
+ ::Weneedfeed::Application.router
24
36
  end
25
37
  end
26
38
  end
@@ -3,10 +3,10 @@
3
3
  module Weneedfeed
4
4
  module Views
5
5
  class ShowTopPage < ::Hibana::View
6
- # @param [Array<Weneedfeed::Page>] pages
7
- def initialize(pages:, **argv)
6
+ # @param [Array<Hash>] page_schemata
7
+ def initialize(page_schemata:, **argv)
8
8
  super(**argv)
9
- @pages = pages
9
+ @page_schemata = page_schemata
10
10
  end
11
11
 
12
12
  private
@@ -16,10 +16,10 @@ module Weneedfeed
16
16
  request.path.delete_suffix(router.path(:top_page))
17
17
  end
18
18
 
19
- # @param [String] page_name
19
+ # @param [String] page_id
20
20
  # @return [String]
21
- def feed_path(page_name:)
22
- "#{base_path}#{router.path(:feed, page_name: page_name)}"
21
+ def feed_path(page_id:)
22
+ "#{base_path}#{router.path(:feed, page_id: page_id)}"
23
23
  end
24
24
 
25
25
  # @return [Hanami::Router]
@@ -4,7 +4,7 @@
4
4
  xmlns:content="http://purl.org/rss/1.0/modules/content/">
5
5
  <channel>
6
6
  <title><![CDATA[<%= @page.title %>]]></title>
7
- <link><%= "#{request.base_url}#{request.path}" %></link>
7
+ <link><%= "#{request.base_url}#{top_page_path}" %></link>
8
8
  <atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
9
9
  <description><![CDATA[Recent content on <%= @page.title %>]]></description>
10
10
  <lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
@@ -12,7 +12,9 @@
12
12
  <item>
13
13
  <title><![CDATA[<%= item.title %>]]></title>
14
14
  <link><%= item.link %></link>
15
- <pubDate><%= item.time.rfc822 %></pubDate>
15
+ <% if item.time %>
16
+ <pubDate><%= item.time.rfc822 %></pubDate>
17
+ <% end %>
16
18
  <description><![CDATA[<%= item.description %>]]></description>
17
19
  <content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
18
20
  <guid isPermaLink="true"><%= item.link %></guid>
@@ -7,9 +7,9 @@
7
7
  </head>
8
8
  <body>
9
9
  <ul>
10
- <% @pages.each do |page_name, hash| %>
10
+ <% @page_schemata.each do |page_schema| %>
11
11
  <li>
12
- <a href="<%= feed_path(page_name: page_name) %>"><%= hash['title'] %></a>
12
+ <a href="<%= feed_path(page_id: page_schema.id) %>"><%= page_schema.title %></a>
13
13
  </li>
14
14
  <% end %>
15
15
  </ul>
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ['Ryo Nakamura']
9
9
  spec.email = ['r7kamura@gmail.com']
10
10
 
11
- spec.summary = 'Generate feeds from URL and XPath.'
11
+ spec.summary = 'Generate feeds from URL and selectors.'
12
12
  spec.homepage = 'https://github.com/r7kamura/weneedfeed'
13
13
  spec.license = 'MIT'
14
14
  spec.required_ruby_version = Gem::Requirement.new('>= 2.5.0')
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.require_paths = ['lib']
27
27
 
28
28
  spec.add_runtime_dependency 'faraday'
29
- spec.add_runtime_dependency 'hibana'
29
+ spec.add_runtime_dependency 'hibana', '>= 0.2'
30
30
  spec.add_runtime_dependency 'nokogiri'
31
31
  spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
32
32
  spec.add_runtime_dependency 'thor'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weneedfeed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-07 00:00:00.000000000 Z
11
+ date: 2020-11-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '0.2'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '0.2'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -106,10 +106,15 @@ files:
106
106
  - lib/weneedfeed/capture.rb
107
107
  - lib/weneedfeed/command.rb
108
108
  - lib/weneedfeed/controllers.rb
109
+ - lib/weneedfeed/controllers/base.rb
109
110
  - lib/weneedfeed/controllers/show_feed.rb
110
111
  - lib/weneedfeed/controllers/show_top_page.rb
112
+ - lib/weneedfeed/helpers.rb
113
+ - lib/weneedfeed/helpers/parameters.rb
111
114
  - lib/weneedfeed/item.rb
112
115
  - lib/weneedfeed/page.rb
116
+ - lib/weneedfeed/page_schema.rb
117
+ - lib/weneedfeed/schema.rb
113
118
  - lib/weneedfeed/scraping.rb
114
119
  - lib/weneedfeed/version.rb
115
120
  - lib/weneedfeed/views.rb
@@ -142,5 +147,5 @@ requirements: []
142
147
  rubygems_version: 3.1.2
143
148
  signing_key:
144
149
  specification_version: 4
145
- summary: Generate feeds from URL and XPath.
150
+ summary: Generate feeds from URL and selectors.
146
151
  test_files: []