weneedfeed 0.2.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b6a5d43bf327c8b96247ed37ce2c98ec546e7a339a8eb777dc647a3a594450a4
4
- data.tar.gz: e9ff73155c418c0989d29b0f3f4e4b4133ea5bbb33b92a349b492bddf5965f38
3
+ metadata.gz: 65814d68cfaf1936de388ce721e3aee49a0e4641c0ecd6ae2afb72c858fbd8f4
4
+ data.tar.gz: ed124803cfb9589dd3b8e44169454b07623a2460a7ac2412c1946d08e51d590f
5
5
  SHA512:
6
- metadata.gz: 122215c08d29522f980db52d10de20b9c718a17d7064081a277c3587454f5aedb534e2cf3b3818c1cd42fb89db016d1b59eb096ad91f5dd45e563c45d0971935
7
- data.tar.gz: 7f5ae31b297ad9b5dfad5053bb45baae7a3d844014f3269efed007103ff2146d5592d89984987c8a8dde0e13a4ce7845391e6a1215351e6f883f1c0342eb368e
6
+ metadata.gz: 486e7af49032ebf8124d6360cd835ec5ede778c4dc6911259fcee753247166ef6c53e3468bb3c386a3e2d794b3939685795c230a4ff24e65a67e615c5b39a800
7
+ data.tar.gz: 636f09f3860a1a7a16bf425facd29988e7037ae35a9ab8a11b8a37118339c9bdf750336eb01b8c36e2bee0d8b81ed2b1a9846269096428b702c09b27b873617a
@@ -5,16 +5,18 @@ on:
5
5
  push:
6
6
  branches:
7
7
  - master
8
+
8
9
  jobs:
9
10
  build:
10
11
  runs-on: ubuntu-18.04
11
12
  steps:
12
13
  - uses: actions/checkout@v2
13
- - uses: actions/setup-ruby@v1
14
+ - uses: ruby/setup-ruby@v1
14
15
  with:
15
- ruby-version: '2.7.2'
16
- - run: bundle install --jobs=$(($(nproc) - 1)) --retry=3
17
- - run: bundle exec rubocop --color --parallel
16
+ bundler-cache: true
17
+ ruby-version: 2.7.2
18
+ - uses: r7kamura/rubocop-problem-matchers-action@v1
19
+ - run: bundle exec rubocop --parallel
18
20
  - run: bundle exec rspec --force-color
19
21
 
20
22
 
data/.rspec CHANGED
@@ -1 +1,2 @@
1
+ --format doc
1
2
  --require spec_helper
@@ -1,3 +1,6 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.5
3
+
1
4
  Lint/SuppressedException:
2
5
  Enabled: false
3
6
 
@@ -6,3 +9,6 @@ Metrics:
6
9
 
7
10
  Style/Documentation:
8
11
  Enabled: false
12
+
13
+ Style/TrailingCommaInArguments:
14
+ Enabled: false
@@ -7,6 +7,61 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 0.6.0 - 2020-11-15
11
+
12
+ ### Changed
13
+
14
+ - Change pages schema from Hash to Array.
15
+ - Require hanami-router 2.0.0.alpha3 or later versions.
16
+ - Ignore pubDate when item.time is not found.
17
+ - Make some item_description_selector and item_time_selector optional.
18
+
19
+ ## 0.5.0 - 2020-11-14
20
+
21
+ ### Added
22
+
23
+ - Add datetime attribute support on time element.
24
+
25
+ ### Changed
26
+
27
+ - Change schema format about selectors.
28
+
29
+ ## 0.4.1 - 2020-11-08
30
+
31
+ ### Fixed
32
+
33
+ - Fix error when item description is not found.
34
+
35
+ ## 0.4.0 - 2020-11-08
36
+
37
+ ### Added
38
+
39
+ - Add CSS selector support.
40
+
41
+ ### Changed
42
+
43
+ - Change schema key from `xpath` to `selectors`.
44
+ - Change item_link_selector target from href attribute to a element.
45
+ - Change some argument names from `_xpath` to `_selector`.
46
+ - Change `weneedfeed build` description.
47
+ - Change `weneedfeed server` description.
48
+ - Change channel link from feed URL to top page URL.
49
+ - Sort top page feeds by its title.
50
+
51
+ ## 0.3.0 - 2020-11-08
52
+
53
+ ### Added
54
+
55
+ - Add channel description.
56
+ - Add item description.
57
+ - Add `weneedfeed build` command.
58
+ - Add `weneedfeed server` command.
59
+
60
+ ### Changed
61
+
62
+ - Surround channel title by CDATA.
63
+ - Change channel children order.
64
+
10
65
  ## 0.2.0 - 2020-11-07
11
66
 
12
67
  ### Changed
@@ -1,11 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- weneedfeed (0.1.0)
4
+ weneedfeed (0.6.0)
5
5
  faraday
6
+ hanami-router (= 2.0.0.alpha3)
6
7
  hibana
7
8
  nokogiri
8
9
  rack-capture (>= 0.4.0)
10
+ thor
9
11
 
10
12
  GEM
11
13
  remote: https://rubygems.org/
@@ -13,29 +15,28 @@ GEM
13
15
  addressable (2.7.0)
14
16
  public_suffix (>= 2.0.2, < 5.0)
15
17
  ast (2.4.1)
16
- concurrent-ruby (1.1.7)
17
18
  crack (0.4.4)
18
19
  diff-lcs (1.4.4)
19
20
  faraday (1.1.0)
20
21
  multipart-post (>= 1.2, < 3)
21
22
  ruby2_keywords
22
- hanami-router (1.3.2)
23
- hanami-utils (~> 1.3)
24
- http_router (= 0.11.2)
23
+ hanami-router (2.0.0.alpha3)
24
+ mustermann (~> 1.0)
25
+ mustermann-contrib (~> 1.0)
25
26
  rack (~> 2.0)
26
- hanami-utils (1.3.6)
27
- concurrent-ruby (~> 1.0)
28
- transproc (~> 1.0)
27
+ hansi (0.2.0)
29
28
  hashdiff (1.0.1)
30
29
  hibana (0.1.1)
31
30
  hanami-router
32
31
  rack
33
32
  tilt
34
- http_router (0.11.2)
35
- rack (>= 1.0.0)
36
- url_mount (~> 0.2.1)
37
33
  mini_portile2 (2.4.0)
38
34
  multipart-post (2.1.1)
35
+ mustermann (1.1.1)
36
+ ruby2_keywords (~> 0.0.1)
37
+ mustermann-contrib (1.1.1)
38
+ hansi (~> 0.2.0)
39
+ mustermann (= 1.1.1)
39
40
  nokogiri (1.10.10)
40
41
  mini_portile2 (~> 2.4.0)
41
42
  parallel (1.19.2)
@@ -77,11 +78,9 @@ GEM
77
78
  parser (>= 2.7.1.5)
78
79
  ruby-progressbar (1.10.1)
79
80
  ruby2_keywords (0.0.2)
81
+ thor (1.0.1)
80
82
  tilt (2.0.10)
81
- transproc (1.1.1)
82
83
  unicode-display_width (1.7.0)
83
- url_mount (0.2.1)
84
- rack
85
84
  webmock (3.9.3)
86
85
  addressable (>= 2.3.6)
87
86
  crack (>= 0.3.2)
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![](https://badge.fury.io/rb/weneedfeed.svg)](https://rubygems.org/gems/weneedfeed)
4
4
  [![](https://github.com/r7kamura/weneedfeed/workflows/test/badge.svg)](https://github.com/r7kamura/weneedfeed/actions?query=workflow%3Atest)
5
5
 
6
- Generate feeds from URL and XPath.
6
+ Generate feeds from URL and selectors.
7
7
 
8
8
  ## Installation
9
9
 
@@ -25,38 +25,124 @@ Or install it yourself as:
25
25
  gem install weneedfeed
26
26
  ```
27
27
 
28
- ## Usage
28
+ ## Schema
29
+
30
+ You need to write a schema file named with `weneedfeed.yml` to use this gem.
29
31
 
30
- Write schema:
32
+ ### Example
31
33
 
32
34
  ```yaml
33
35
  pages:
34
- example:
35
- title: example name
36
- url: http://example.com/
37
- xpath:
38
- item: //li
39
- item_description: .//p[3]
40
- item_link: .//a/@href
41
- item_time: .//time/@datetime
42
- item_title: .//p[2]
36
+ - id: example1
37
+ title: Example feed with CSS Selector
38
+ url: http://example.com/1
39
+ item_selector: li
40
+ item_description_selector: p:nth-child(3)
41
+ item_link_selector: a
42
+ item_time_selector: time
43
+ item_title_selector: p:nth-child(2)
44
+ - id: example2
45
+ title: Example feed with XPath
46
+ url: http://example.com/2
47
+ item_selector: //li
48
+ item_description_selector: .//p[3]
49
+ item_link_selector: .//a
50
+ item_time_selector: .//time
51
+ item_title_selector: .//p[2]
43
52
  ```
44
53
 
45
- And then call `Weneedfeed::Capture`:
54
+ ### `id`
46
55
 
47
- ```ruby
48
- require 'weneedfeed'
56
+ Feed ID.
57
+
58
+ - required
59
+ - Used for feed URL.
60
+
61
+ ### `title`
62
+
63
+ Feed title.
64
+
65
+ - required
66
+ - Used for RSS `<title>` element in `<channel>` element.
67
+
68
+ ### `url`
69
+
70
+ HTML source URL.
71
+
72
+ - required
73
+ - Used to fetch HTML page for building feed.
74
+
75
+ ### `item_selector`
76
+
77
+ CSS or XPath selector to search each item.
78
+
79
+ - required
80
+ - Equivalent unit to RSS `<item>` element.
81
+
82
+ ### `item_link_selector`
83
+
84
+ CSS or XPath selector to find `<a>` element in each item.
85
+
86
+ - required
87
+ - Used for `<link>` in `<item>`.
88
+
89
+ ### `item_title_selector`
90
+
91
+ CSS or XPath selector to find element with title information in each item.
92
+
93
+ - required
94
+ - Used for `<title>` in `<item>`.
95
+
96
+ ### `item_description_selector`
97
+
98
+ CSS or XPath selector to find element with description information in each item.
99
+
100
+ - optional
101
+ - Used for `<description>` in `<item>`.
102
+
103
+ ### `item_time_selector`
104
+
105
+ CSS or XPath selector to find element with datetime information in each item.
106
+
107
+ - optional
108
+ - Used for `<pubDate>` in `<item>`. Its `datetime` attribute or its inner HTML is used to calculate datetime.
109
+
110
+ ## Usage
111
+
112
+ ### Build
113
+
114
+ Run `weneedfeed build` to build static files.
115
+
116
+ ```
117
+ Usage:
118
+ weneedfeed build --base-url=BASE_URL
119
+
120
+ Options:
121
+ --base-url=BASE_URL # Base URL where to locate built files. (e.g. `"https://user.github.io/repo"`)
122
+ [--schema-path=SCHEMA_PATH] # Path to weneedfeed YAML schema file.
123
+ # Default: weneedfeed.yml
124
+
125
+ Build static files.
126
+ ```
127
+
128
+ ### Server
129
+
130
+ Run `weneedfeed server` to run HTTP server.
131
+
132
+ ```
133
+ Usage:
134
+ weneedfeed server
135
+
136
+ Options:
137
+ [--schema-path=SCHEMA_PATH] # Path to weneedfeed YAML schema file.
138
+ # Default: weneedfeed.yml
49
139
 
50
- Weneedfeed::Capture.call(
51
- base_url: 'https://user.github.io/repo',
52
- schema_path: 'schema.yml'
53
- )
140
+ Run HTTP server.
54
141
  ```
55
142
 
56
- These files will be generated:
143
+ ## GitHub Actions Integration
57
144
 
58
- - output/index.html
59
- - output/feeds/example.xml
145
+ Use [weneedfeed-action](https://github.com/r7kamura/weneedfeed-action) for invoking weneedfeed on GitHub Actions.
60
146
 
61
147
  ## Development
62
148
 
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'weneedfeed'
5
+
6
+ Weneedfeed::Command.start(ARGV)
@@ -5,9 +5,13 @@ require 'weneedfeed/version'
5
5
  module Weneedfeed
6
6
  autoload :Application, 'weneedfeed/application'
7
7
  autoload :Capture, 'weneedfeed/capture'
8
+ autoload :Command, 'weneedfeed/command'
8
9
  autoload :Controllers, 'weneedfeed/controllers'
10
+ autoload :Helpers, 'weneedfeed/helpers'
9
11
  autoload :Item, 'weneedfeed/item'
10
12
  autoload :Page, 'weneedfeed/page'
13
+ autoload :PageSchema, 'weneedfeed/page_schema'
14
+ autoload :Schema, 'weneedfeed/schema'
11
15
  autoload :Scraping, 'weneedfeed/scraping'
12
16
  autoload :Views, 'weneedfeed/views'
13
17
  end
@@ -6,12 +6,12 @@ module Weneedfeed
6
6
  class Application < ::Hibana::Application
7
7
  route do
8
8
  get '/', to: ::Weneedfeed::Controllers::ShowTopPage, as: :top_page
9
- get '/feeds/:page_name.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
9
+ get '/feeds/:page_id.xml', to: ::Weneedfeed::Controllers::ShowFeed, as: :feed
10
10
  end
11
11
 
12
12
  # @param [Hash] schema
13
13
  def initialize(schema:)
14
- @schema = schema
14
+ @schema = ::Weneedfeed::Schema.new(schema)
15
15
  super()
16
16
  end
17
17
 
@@ -23,8 +23,8 @@ module Weneedfeed
23
23
 
24
24
  # @return [Array<String>]
25
25
  def paths
26
- ['/'] + @schema['pages'].keys.map do |page_name|
27
- "/feeds/#{page_name}.xml"
26
+ ['/'] + @schema.page_ids.map do |page_id|
27
+ "/feeds/#{page_id}.xml"
28
28
  end
29
29
  end
30
30
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rack'
4
+ require 'thor'
5
+ require 'yaml'
6
+
7
+ module Weneedfeed
8
+ class Command < ::Thor
9
+ class << self
10
+ # @note Override for thor breaking change.
11
+ # See https://github.com/erikhuda/thor/issues/244.
12
+ def exit_on_failure?
13
+ true
14
+ end
15
+ end
16
+
17
+ desc(
18
+ 'build',
19
+ 'Build static files.'
20
+ )
21
+
22
+ method_option(
23
+ :base_url,
24
+ desc: 'Base URL where to locate built files. (e.g. `"https://user.github.io/repo"`)',
25
+ required: true,
26
+ type: :string
27
+ )
28
+
29
+ method_option(
30
+ :schema_path,
31
+ default: 'weneedfeed.yml',
32
+ desc: 'Path to weneedfeed YAML schema file.',
33
+ type: :string
34
+ )
35
+
36
+ # @param [String] base_url
37
+ # @param [String] schema_path
38
+ def build
39
+ ::Weneedfeed::Capture.call(
40
+ base_url: options[:base_url],
41
+ schema_path: options[:schema_path]
42
+ )
43
+ end
44
+
45
+ desc(
46
+ 'server',
47
+ 'Run HTTP server.'
48
+ )
49
+
50
+ method_option(
51
+ :schema_path,
52
+ default: 'weneedfeed.yml',
53
+ desc: 'Path to weneedfeed YAML schema file.',
54
+ type: :string
55
+ )
56
+
57
+ def server
58
+ schema = ::YAML.load_file(options[:schema_path])
59
+ application = Weneedfeed::Application.new(schema: schema)
60
+ ::Rack::Handler.default.run(application)
61
+ end
62
+ end
63
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
+ autoload :Base, 'weneedfeed/controllers/base'
5
6
  autoload :ShowFeed, 'weneedfeed/controllers/show_feed'
6
7
  autoload :ShowTopPage, 'weneedfeed/controllers/show_top_page'
7
8
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Controllers
5
+ class Base < ::Hibana::Controller
6
+ include ::Weneedfeed::Helpers::Parameters
7
+
8
+ private
9
+
10
+ # @return [Weneedfeed::Schema]
11
+ def schema
12
+ request.env['weneedfeed.schema']
13
+ end
14
+ end
15
+ end
16
+ end
@@ -2,31 +2,22 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
- class ShowFeed < ::Hibana::Controller
5
+ class ShowFeed < ::Weneedfeed::Controllers::Base
6
6
  def call
7
- env = request.env
8
- page_name = env.dig(
9
- 'router.params',
10
- :page_name
11
- )
12
- properties = env.dig(
13
- 'weneedfeed.schema',
14
- 'pages',
15
- page_name
16
- )
17
- unless properties
7
+ page_schema = schema.find_page_schema(path_parameters[:page_id])
8
+ unless page_schema
18
9
  response.status = 404
19
10
  return
20
11
  end
21
12
 
22
13
  scraping = ::Weneedfeed::Scraping.new(
23
- item_description_xpath: properties['xpath']['item_description'],
24
- item_link_xpath: properties['xpath']['item_link'],
25
- item_time_xpath: properties['xpath']['item_time'],
26
- item_title_xpath: properties['xpath']['item_title'],
27
- item_xpath: properties['xpath']['item'],
28
- title: properties['title'],
29
- url: properties['url']
14
+ item_description_selector: page_schema.item_description_selector,
15
+ item_link_selector: page_schema.item_link_selector,
16
+ item_time_selector: page_schema.item_time_selector,
17
+ item_title_selector: page_schema.item_title_selector,
18
+ item_selector: page_schema.item_selector,
19
+ title: page_schema.title,
20
+ url: page_schema.url,
30
21
  )
31
22
  page = scraping.call
32
23
 
@@ -2,16 +2,13 @@
2
2
 
3
3
  module Weneedfeed
4
4
  module Controllers
5
- class ShowTopPage < ::Hibana::Controller
5
+ class ShowTopPage < ::Weneedfeed::Controllers::Base
6
6
  def call
7
- pages = request.env.dig(
8
- 'weneedfeed.schema',
9
- 'pages'
10
- )
7
+ page_schemata = schema.page_schemata.sort_by(&:title)
11
8
  response.content_type = 'text/html'
12
9
  response.write(
13
10
  ::Weneedfeed::Views::ShowTopPage.new(
14
- pages: pages,
11
+ page_schemata: page_schemata,
15
12
  partial_template_path: ::File.expand_path(
16
13
  'templates/show_top_page.html.erb',
17
14
  "#{__dir__}/../../.."
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Helpers
5
+ autoload :Parameters, 'weneedfeed/helpers/parameters'
6
+ end
7
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ module Helpers
5
+ module Parameters
6
+ private
7
+
8
+ # @return [Hash]
9
+ def path_parameters
10
+ request.env['router.params']
11
+ end
12
+ end
13
+ end
14
+ end
@@ -15,56 +15,71 @@ module Weneedfeed
15
15
  end
16
16
  end
17
17
 
18
- # @param [String] description_xpath
19
- # @param [String] link_xpath
18
+ # @param [String, nil] description_selector
19
+ # @param [String, nil] link_selector
20
20
  # @param [Nokogiri::Node] node
21
- # @param [String] time_xpath
22
- # @param [String] title_xpath
21
+ # @param [String] time_selector
22
+ # @param [String] title_selector
23
23
  # @param [String] url
24
24
  def initialize(
25
- description_xpath:,
26
- link_xpath:,
25
+ description_selector:,
26
+ link_selector:,
27
27
  node:,
28
- time_xpath:,
29
- title_xpath:,
28
+ time_selector:,
29
+ title_selector:,
30
30
  url:
31
31
  )
32
- @description_xpath = description_xpath
33
- @link_xpath = link_xpath
32
+ @description_selector = description_selector
33
+ @link_selector = link_selector
34
34
  @node = node
35
- @time_xpath = time_xpath
36
- @title_xpath = title_xpath
35
+ @time_selector = time_selector
36
+ @title_selector = title_selector
37
37
  @url = url
38
38
  end
39
39
 
40
40
  # @return [String, nil]
41
41
  def description
42
- @node.xpath(@description_xpath).inner_html
42
+ return unless @description_selector
43
+
44
+ @node.at(@description_selector)&.inner_html
43
45
  end
44
46
 
45
47
  # @return [String]
46
48
  def link
47
49
  ::URI.join(
48
50
  @url,
49
- @node.xpath(@link_xpath).inner_html
51
+ @node.at(@link_selector)['href']
50
52
  ).to_s
51
53
  end
52
54
 
53
55
  # @return [Time, nil]
54
56
  def time
55
- self.class.parse_time(time_string)
57
+ return unless @time_selector
58
+
59
+ string = time_string
60
+ return unless string
61
+
62
+ self.class.parse_time(string)
56
63
  end
57
64
 
58
- # @return [String, nil]
65
+ # @return [String]
59
66
  def title
60
- @node.xpath(@title_xpath).inner_text
67
+ @node.at(@title_selector).inner_text
61
68
  end
62
69
 
63
70
  private
64
71
 
65
- # @return [String]
72
+ # @return [Nokogiri::Node, nil]
73
+ def time_node
74
+ @node.at(@time_selector)
75
+ end
76
+
77
+ # @return [String, nil]
66
78
  def time_string
67
- @node.xpath(@time_xpath).inner_html
79
+ node = time_node
80
+ return unless node
81
+
82
+ node['datetime'] || node.inner_html
68
83
  end
69
84
  end
70
85
  end
@@ -8,29 +8,29 @@ module Weneedfeed
8
8
  # @return [String]
9
9
  attr_reader :url
10
10
 
11
- # @param [String] item_description_xpath
12
- # @param [String] item_link_xpath
13
- # @param [String] item_time_xpath
14
- # @param [String] item_title_xpath
15
- # @param [String] item_xpath
11
+ # @param [String, nil] item_description_selector
12
+ # @param [String] item_link_selector
13
+ # @param [String, nil] item_time_selector
14
+ # @param [String] item_title_selector
15
+ # @param [String] item_selector
16
16
  # @param [Nokogiri::Node] node
17
17
  # @param [String] title
18
18
  # @param [String] url
19
19
  def initialize(
20
- item_description_xpath:,
21
- item_link_xpath:,
22
- item_time_xpath:,
23
- item_title_xpath:,
24
- item_xpath:,
20
+ item_description_selector:,
21
+ item_link_selector:,
22
+ item_time_selector:,
23
+ item_title_selector:,
24
+ item_selector:,
25
25
  node:,
26
26
  title:,
27
27
  url:
28
28
  )
29
- @item_description_xpath = item_description_xpath
30
- @item_link_xpath = item_link_xpath
31
- @item_time_xpath = item_time_xpath
32
- @item_title_xpath = item_title_xpath
33
- @item_xpath = item_xpath
29
+ @item_description_selector = item_description_selector
30
+ @item_link_selector = item_link_selector
31
+ @item_time_selector = item_time_selector
32
+ @item_title_selector = item_title_selector
33
+ @item_selector = item_selector
34
34
  @node = node
35
35
  @title = title
36
36
  @url = url
@@ -38,13 +38,13 @@ module Weneedfeed
38
38
 
39
39
  # @return [Array<Weneedfeed::Item>]
40
40
  def items
41
- @node.xpath(@item_xpath).map do |node|
41
+ @node.search(@item_selector).map do |node|
42
42
  ::Weneedfeed::Item.new(
43
- description_xpath: @item_description_xpath,
44
- link_xpath: @item_link_xpath,
43
+ description_selector: @item_description_selector,
44
+ link_selector: @item_link_selector,
45
45
  node: node,
46
- time_xpath: @item_time_xpath,
47
- title_xpath: @item_title_xpath,
46
+ time_selector: @item_time_selector,
47
+ title_selector: @item_title_selector,
48
48
  url: @url
49
49
  )
50
50
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ PageSchema = Struct.new(
5
+ :id,
6
+ :item_description_selector,
7
+ :item_link_selector,
8
+ :item_time_selector,
9
+ :item_title_selector,
10
+ :item_selector,
11
+ :title,
12
+ :url,
13
+ keyword_init: true,
14
+ )
15
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Weneedfeed
4
+ class Schema
5
+ # @param [Hash] raw
6
+ def initialize(raw)
7
+ @raw = raw
8
+ end
9
+
10
+ # @param [String, nl] page_schema_id
11
+ # @return [Weneedfeed::PageSchema]
12
+ def find_page_schema(page_schema_id)
13
+ page_schemata.find do |page_schema|
14
+ page_schema.id == page_schema_id
15
+ end
16
+ end
17
+
18
+ # @return [Array<String>]
19
+ def page_ids
20
+ page_schemata.map(&:id)
21
+ end
22
+
23
+ # @return [Array<Weneedfeed::PageSchema>]
24
+ def page_schemata
25
+ @raw['pages'].map do |hash|
26
+ ::Weneedfeed::PageSchema.new(
27
+ id: hash['id'],
28
+ item_link_selector: hash['item_link_selector'],
29
+ item_time_selector: hash['item_time_selector'],
30
+ item_title_selector: hash['item_title_selector'],
31
+ item_selector: hash['item_selector'],
32
+ title: hash['title'],
33
+ url: hash['url'],
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
@@ -5,27 +5,27 @@ require 'nokogiri'
5
5
 
6
6
  module Weneedfeed
7
7
  class Scraping
8
- # @param [String] item_description_xpath
9
- # @param [String] item_link_xpath
10
- # @param [String] item_time_xpath
11
- # @param [String] item_title_xpath
12
- # @param [String] item_xpath
8
+ # @param [String, nil] item_description_selector
9
+ # @param [String] item_link_selector
10
+ # @param [String, nil] item_time_selector
11
+ # @param [String] item_title_selector
12
+ # @param [String] item_selector
13
13
  # @param [String] title
14
14
  # @param [String] url
15
15
  def initialize(
16
- item_description_xpath:,
17
- item_link_xpath:,
18
- item_time_xpath:,
19
- item_title_xpath:,
20
- item_xpath:,
16
+ item_description_selector:,
17
+ item_link_selector:,
18
+ item_time_selector:,
19
+ item_title_selector:,
20
+ item_selector:,
21
21
  title:,
22
22
  url:
23
23
  )
24
- @item_description_xpath = item_description_xpath
25
- @item_link_xpath = item_link_xpath
26
- @item_time_xpath = item_time_xpath
27
- @item_title_xpath = item_title_xpath
28
- @item_xpath = item_xpath
24
+ @item_description_selector = item_description_selector
25
+ @item_link_selector = item_link_selector
26
+ @item_time_selector = item_time_selector
27
+ @item_title_selector = item_title_selector
28
+ @item_selector = item_selector
29
29
  @title = title
30
30
  @url = url
31
31
  end
@@ -34,11 +34,11 @@ module Weneedfeed
34
34
  def call
35
35
  ::Weneedfeed::Page.new(
36
36
  node: parsed_body,
37
- item_description_xpath: @item_description_xpath,
38
- item_xpath: @item_xpath,
39
- item_link_xpath: @item_link_xpath,
40
- item_time_xpath: @item_time_xpath,
41
- item_title_xpath: @item_title_xpath,
37
+ item_description_selector: @item_description_selector,
38
+ item_selector: @item_selector,
39
+ item_link_selector: @item_link_selector,
40
+ item_time_selector: @item_time_selector,
41
+ item_title_selector: @item_title_selector,
42
42
  title: @title,
43
43
  url: @url
44
44
  )
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Weneedfeed
4
- VERSION = '0.2.0'
4
+ VERSION = '0.6.0'
5
5
  end
@@ -3,6 +3,8 @@
3
3
  module Weneedfeed
4
4
  module Views
5
5
  class ShowFeed < ::Hibana::View
6
+ include ::Weneedfeed::Helpers::Parameters
7
+
6
8
  # @param [Weneedfeed::Page] page
7
9
  def initialize(page:, **argv)
8
10
  super(**argv)
@@ -11,6 +13,11 @@ module Weneedfeed
11
13
 
12
14
  private
13
15
 
16
+ # @return [String]
17
+ def top_page_path
18
+ request.path.delete_suffix(router.path(:feed, page_id: page_id))
19
+ end
20
+
14
21
  # @return [Enumerable<Weneedfeed::Item>]
15
22
  def items
16
23
  @page.items.sort_by do |item|
@@ -19,8 +26,13 @@ module Weneedfeed
19
26
  end
20
27
 
21
28
  # @return [String]
22
- def page_name
23
- request.env['router.params'][:page_name]
29
+ def page_id
30
+ path_parameters[:page_id]
31
+ end
32
+
33
+ # @return [Hanami::Router]
34
+ def router
35
+ ::Weneedfeed::Application.router
24
36
  end
25
37
  end
26
38
  end
@@ -3,10 +3,10 @@
3
3
  module Weneedfeed
4
4
  module Views
5
5
  class ShowTopPage < ::Hibana::View
6
- # @param [Array<Weneedfeed::Page>] pages
7
- def initialize(pages:, **argv)
6
+ # @param [Array<Hash>] page_schemata
7
+ def initialize(page_schemata:, **argv)
8
8
  super(**argv)
9
- @pages = pages
9
+ @page_schemata = page_schemata
10
10
  end
11
11
 
12
12
  private
@@ -16,10 +16,10 @@ module Weneedfeed
16
16
  request.path.delete_suffix(router.path(:top_page))
17
17
  end
18
18
 
19
- # @param [String] page_name
19
+ # @param [String] page_id
20
20
  # @return [String]
21
- def feed_path(page_name:)
22
- "#{base_path}#{router.path(:feed, page_name: page_name)}"
21
+ def feed_path(page_id:)
22
+ "#{base_path}#{router.path(:feed, page_id: page_id)}"
23
23
  end
24
24
 
25
25
  # @return [Hanami::Router]
@@ -3,16 +3,19 @@
3
3
  xmlns:atom="http://www.w3.org/2005/Atom"
4
4
  xmlns:content="http://purl.org/rss/1.0/modules/content/">
5
5
  <channel>
6
- <description></description>
7
- <link><%= "#{request.base_url}#{request.path}" %></link>
6
+ <title><![CDATA[<%= @page.title %>]]></title>
7
+ <link><%= "#{request.base_url}#{top_page_path}" %></link>
8
8
  <atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
9
- <title><%= @page.title %></title>
9
+ <description><![CDATA[Recent content on <%= @page.title %>]]></description>
10
10
  <lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
11
11
  <% items.each do |item| %>
12
12
  <item>
13
13
  <title><![CDATA[<%= item.title %>]]></title>
14
14
  <link><%= item.link %></link>
15
- <pubDate><%= item.time.rfc822 %></pubDate>
15
+ <% if item.time %>
16
+ <pubDate><%= item.time.rfc822 %></pubDate>
17
+ <% end %>
18
+ <description><![CDATA[<%= item.description %>]]></description>
16
19
  <content:encoded><![CDATA[<%= item.description %>]]></content:encoded>
17
20
  <guid isPermaLink="true"><%= item.link %></guid>
18
21
  </item>
@@ -7,9 +7,9 @@
7
7
  </head>
8
8
  <body>
9
9
  <ul>
10
- <% @pages.each do |page_name, hash| %>
10
+ <% @page_schemata.each do |page_schema| %>
11
11
  <li>
12
- <a href="<%= feed_path(page_name: page_name) %>"><%= hash['title'] %></a>
12
+ <a href="<%= feed_path(page_id: page_schema.id) %>"><%= page_schema.title %></a>
13
13
  </li>
14
14
  <% end %>
15
15
  </ul>
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ['Ryo Nakamura']
9
9
  spec.email = ['r7kamura@gmail.com']
10
10
 
11
- spec.summary = 'Generate feeds from URL and XPath.'
11
+ spec.summary = 'Generate feeds from URL and selectors.'
12
12
  spec.homepage = 'https://github.com/r7kamura/weneedfeed'
13
13
  spec.license = 'MIT'
14
14
  spec.required_ruby_version = Gem::Requirement.new('>= 2.5.0')
@@ -21,10 +21,14 @@ Gem::Specification.new do |spec|
21
21
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
22
22
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
23
  end
24
+ spec.bindir = 'exe'
25
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
26
  spec.require_paths = ['lib']
25
27
 
26
28
  spec.add_runtime_dependency 'faraday'
29
+ spec.add_runtime_dependency 'hanami-router', '2.0.0.alpha3'
27
30
  spec.add_runtime_dependency 'hibana'
28
31
  spec.add_runtime_dependency 'nokogiri'
29
32
  spec.add_runtime_dependency 'rack-capture', '>= 0.4.0'
33
+ spec.add_runtime_dependency 'thor'
30
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weneedfeed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-07 00:00:00.000000000 Z
11
+ date: 2020-11-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hanami-router
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.0.alpha3
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 2.0.0.alpha3
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: hibana
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -66,10 +80,25 @@ dependencies:
66
80
  - - ">="
67
81
  - !ruby/object:Gem::Version
68
82
  version: 0.4.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: thor
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
69
97
  description:
70
98
  email:
71
99
  - r7kamura@gmail.com
72
- executables: []
100
+ executables:
101
+ - weneedfeed
73
102
  extensions: []
74
103
  extra_rdoc_files: []
75
104
  files:
@@ -85,14 +114,21 @@ files:
85
114
  - Rakefile
86
115
  - bin/console
87
116
  - bin/setup
117
+ - exe/weneedfeed
88
118
  - lib/weneedfeed.rb
89
119
  - lib/weneedfeed/application.rb
90
120
  - lib/weneedfeed/capture.rb
121
+ - lib/weneedfeed/command.rb
91
122
  - lib/weneedfeed/controllers.rb
123
+ - lib/weneedfeed/controllers/base.rb
92
124
  - lib/weneedfeed/controllers/show_feed.rb
93
125
  - lib/weneedfeed/controllers/show_top_page.rb
126
+ - lib/weneedfeed/helpers.rb
127
+ - lib/weneedfeed/helpers/parameters.rb
94
128
  - lib/weneedfeed/item.rb
95
129
  - lib/weneedfeed/page.rb
130
+ - lib/weneedfeed/page_schema.rb
131
+ - lib/weneedfeed/schema.rb
96
132
  - lib/weneedfeed/scraping.rb
97
133
  - lib/weneedfeed/version.rb
98
134
  - lib/weneedfeed/views.rb
@@ -125,5 +161,5 @@ requirements: []
125
161
  rubygems_version: 3.1.2
126
162
  signing_key:
127
163
  specification_version: 4
128
- summary: Generate feeds from URL and XPath.
164
+ summary: Generate feeds from URL and selectors.
129
165
  test_files: []