weneedfeed 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b201152f3a5b05f2c336caf0ba02416eea09da02834a46818dc00c86596ee0d3
4
- data.tar.gz: 0fc63fe70919ac5284297c309545d0d1ea3a3253166d36f434bb6b598aa92721
3
+ metadata.gz: adfe67e7ae534a35ab0e8c7ae33272c9046187564304700e7b6eec52dd671d03
4
+ data.tar.gz: f4081b96371d71f0833ae6bf16e63d2719e4d341c9d23f8e17ab543f538ba6ac
5
5
  SHA512:
6
- metadata.gz: a318105d868fc4ca2b91a03541feffde8bb1af257b4c3126716c2988f21703f7ee87699183ab0d8f3ab67d111008af9485ee4e219d464ef618e459b5a8b8fc1e
7
- data.tar.gz: 726e20a19227146a81a309b32837396753a1fe572fe1ec6fd8ce44767dab61ebc337ccaa5af53d8d1449ba987bcefd76d6486db3988f4c95bd8d316f2440fd57
6
+ metadata.gz: e58d4b58d0813ecdbe5c995acbf8e4a2b55c3598fb81967b0a169fcdb72ebc7ee6717bd39fcc95c043b88345cb6396f2221aa507a0cae2a0a07b1936f3442751
7
+ data.tar.gz: b521aba54d499732ec09fc93de7d77ed642de53aaf828f49683b5f66c3616c781ed79675fbfb0df7a6aca4854897f57c92d2da09c0ad0f375bab12f71cdff7dd
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## Unreleased
9
9
 
10
+ ## 0.4.0 - 2020-11-08
11
+
12
+ ### Added
13
+
14
+ - Add CSS selector support.
15
+
16
+ ### Changed
17
+
18
+ - Change schema key from `xpath` to `selectors`.
19
+ - Change item_link_selector target from href attribute to a element.
20
+ - Change some argument names from `_xpath` to `_selector`.
21
+ - Change `weneedfeed build` description.
22
+ - Change `weneedfeed server` description.
23
+ - Change channel link from feed URL to top page URL.
24
+ - Sort top page feeds by its title.
25
+
10
26
  ## 0.3.0 - 2020-11-08
11
27
 
12
28
  ### Added
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- weneedfeed (0.3.0)
4
+ weneedfeed (0.4.0)
5
5
  faraday
6
6
  hibana
7
7
  nokogiri
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![](https://badge.fury.io/rb/weneedfeed.svg)](https://rubygems.org/gems/weneedfeed)
4
4
  [![](https://github.com/r7kamura/weneedfeed/workflows/test/badge.svg)](https://github.com/r7kamura/weneedfeed/actions?query=workflow%3Atest)
5
5
 
6
- Generate feeds from URL and XPath.
6
+ Generate feeds from URL and selectors.
7
7
 
8
8
  ## Installation
9
9
 
@@ -27,14 +27,25 @@ gem install weneedfeed
27
27
 
28
28
  ## Usage
29
29
 
30
- Write schema:
30
+ ### Schema
31
+
32
+ Write `weneedfeed.yml`.
31
33
 
32
34
  ```yaml
33
35
  pages:
34
- example:
35
- title: example name
36
- url: http://example.com/
37
- xpath:
36
+ example1:
37
+ title: example site 1
38
+ url: http://example.com/1
39
+ selectors:
40
+ item: li
41
+ item_description: p:nth-child(3)
42
+ item_link: a
43
+ item_time: time[datetime]
44
+ item_title: p:nth-child(2)
45
+ example2:
46
+ title: example site 2
47
+ url: http://example.com/2
48
+ selectors:
38
49
  item: //li
39
50
  item_description: .//p[3]
40
51
  item_link: .//a/@href
@@ -42,21 +53,40 @@ pages:
42
53
  item_title: .//p[2]
43
54
  ```
44
55
 
45
- And then call `Weneedfeed::Capture`:
56
+ ### Build
46
57
 
47
- ```ruby
48
- require 'weneedfeed'
58
+ Run `weneedfeed build` to build static files.
59
+
60
+ ```
61
+ Usage:
62
+ weneedfeed build --base-url=BASE_URL
63
+
64
+ Options:
65
+ --base-url=BASE_URL # Base URL where to locate built files. (e.g. `"https://user.github.io/repo"`)
66
+ [--schema-path=SCHEMA_PATH] # Path to weneedfeed YAML schema file.
67
+ # Default: weneedfeed.yml
68
+
69
+ Build static files.
70
+ ```
71
+
72
+ ### Server
73
+
74
+ Run `weneedfeed server` to run HTTP server.
75
+
76
+ ```
77
+ Usage:
78
+ weneedfeed server
79
+
80
+ Options:
81
+ [--schema-path=SCHEMA_PATH] # Path to weneedfeed YAML schema file.
82
+ # Default: weneedfeed.yml
49
83
 
50
- Weneedfeed::Capture.call(
51
- base_url: 'https://user.github.io/repo',
52
- schema_path: 'schema.yml'
53
- )
84
+ Run HTTP server.
54
85
  ```
55
86
 
56
- These files will be generated:
87
+ ## GitHub Actions Integration
57
88
 
58
- - output/index.html
59
- - output/feeds/example.xml
89
+ Use [weneedfeed-action](https://github.com/r7kamura/weneedfeed-action) for invoking weneedfeed on GitHub Actions.
60
90
 
61
91
  ## Development
62
92
 
@@ -16,7 +16,7 @@ module Weneedfeed
16
16
 
17
17
  desc(
18
18
  'build',
19
- 'Build static files for feeds.'
19
+ 'Build static files.'
20
20
  )
21
21
 
22
22
  method_option(
@@ -44,7 +44,7 @@ module Weneedfeed
44
44
 
45
45
  desc(
46
46
  'server',
47
- 'Run HTTP server'
47
+ 'Run HTTP server.'
48
48
  )
49
49
 
50
50
  method_option(
@@ -20,11 +20,11 @@ module Weneedfeed
20
20
  end
21
21
 
22
22
  scraping = ::Weneedfeed::Scraping.new(
23
- item_description_xpath: properties['xpath']['item_description'],
24
- item_link_xpath: properties['xpath']['item_link'],
25
- item_time_xpath: properties['xpath']['item_time'],
26
- item_title_xpath: properties['xpath']['item_title'],
27
- item_xpath: properties['xpath']['item'],
23
+ item_description_selector: properties['selectors']['item_description'],
24
+ item_link_selector: properties['selectors']['item_link'],
25
+ item_time_selector: properties['selectors']['item_time'],
26
+ item_title_selector: properties['selectors']['item_title'],
27
+ item_selector: properties['selectors']['item'],
28
28
  title: properties['title'],
29
29
  url: properties['url']
30
30
  )
@@ -7,7 +7,9 @@ module Weneedfeed
7
7
  pages = request.env.dig(
8
8
  'weneedfeed.schema',
9
9
  'pages'
10
- )
10
+ ).sort_by do |_key, value|
11
+ value['title']
12
+ end
11
13
  response.content_type = 'text/html'
12
14
  response.write(
13
15
  ::Weneedfeed::Views::ShowTopPage.new(
@@ -15,38 +15,38 @@ module Weneedfeed
15
15
  end
16
16
  end
17
17
 
18
- # @param [String] description_xpath
19
- # @param [String] link_xpath
18
+ # @param [String] description_selector
19
+ # @param [String] link_selector
20
20
  # @param [Nokogiri::Node] node
21
- # @param [String] time_xpath
22
- # @param [String] title_xpath
21
+ # @param [String] time_selector
22
+ # @param [String] title_selector
23
23
  # @param [String] url
24
24
  def initialize(
25
- description_xpath:,
26
- link_xpath:,
25
+ description_selector:,
26
+ link_selector:,
27
27
  node:,
28
- time_xpath:,
29
- title_xpath:,
28
+ time_selector:,
29
+ title_selector:,
30
30
  url:
31
31
  )
32
- @description_xpath = description_xpath
33
- @link_xpath = link_xpath
32
+ @description_selector = description_selector
33
+ @link_selector = link_selector
34
34
  @node = node
35
- @time_xpath = time_xpath
36
- @title_xpath = title_xpath
35
+ @time_selector = time_selector
36
+ @title_selector = title_selector
37
37
  @url = url
38
38
  end
39
39
 
40
40
  # @return [String, nil]
41
41
  def description
42
- @node.xpath(@description_xpath).inner_html
42
+ @node.at(@description_selector).inner_html
43
43
  end
44
44
 
45
45
  # @return [String]
46
46
  def link
47
47
  ::URI.join(
48
48
  @url,
49
- @node.xpath(@link_xpath).inner_html
49
+ @node.at(@link_selector)['href']
50
50
  ).to_s
51
51
  end
52
52
 
@@ -57,14 +57,14 @@ module Weneedfeed
57
57
 
58
58
  # @return [String, nil]
59
59
  def title
60
- @node.xpath(@title_xpath).inner_text
60
+ @node.at(@title_selector).inner_text
61
61
  end
62
62
 
63
63
  private
64
64
 
65
65
  # @return [String]
66
66
  def time_string
67
- @node.xpath(@time_xpath).inner_html
67
+ @node.at(@time_selector).inner_html
68
68
  end
69
69
  end
70
70
  end
@@ -8,29 +8,29 @@ module Weneedfeed
8
8
  # @return [String]
9
9
  attr_reader :url
10
10
 
11
- # @param [String] item_description_xpath
12
- # @param [String] item_link_xpath
13
- # @param [String] item_time_xpath
14
- # @param [String] item_title_xpath
15
- # @param [String] item_xpath
11
+ # @param [String] item_description_selector
12
+ # @param [String] item_link_selector
13
+ # @param [String] item_time_selector
14
+ # @param [String] item_title_selector
15
+ # @param [String] item_selector
16
16
  # @param [Nokogiri::Node] node
17
17
  # @param [String] title
18
18
  # @param [String] url
19
19
  def initialize(
20
- item_description_xpath:,
21
- item_link_xpath:,
22
- item_time_xpath:,
23
- item_title_xpath:,
24
- item_xpath:,
20
+ item_description_selector:,
21
+ item_link_selector:,
22
+ item_time_selector:,
23
+ item_title_selector:,
24
+ item_selector:,
25
25
  node:,
26
26
  title:,
27
27
  url:
28
28
  )
29
- @item_description_xpath = item_description_xpath
30
- @item_link_xpath = item_link_xpath
31
- @item_time_xpath = item_time_xpath
32
- @item_title_xpath = item_title_xpath
33
- @item_xpath = item_xpath
29
+ @item_description_selector = item_description_selector
30
+ @item_link_selector = item_link_selector
31
+ @item_time_selector = item_time_selector
32
+ @item_title_selector = item_title_selector
33
+ @item_selector = item_selector
34
34
  @node = node
35
35
  @title = title
36
36
  @url = url
@@ -38,13 +38,13 @@ module Weneedfeed
38
38
 
39
39
  # @return [Array<Weneedfeed::Item>]
40
40
  def items
41
- @node.xpath(@item_xpath).map do |node|
41
+ @node.search(@item_selector).map do |node|
42
42
  ::Weneedfeed::Item.new(
43
- description_xpath: @item_description_xpath,
44
- link_xpath: @item_link_xpath,
43
+ description_selector: @item_description_selector,
44
+ link_selector: @item_link_selector,
45
45
  node: node,
46
- time_xpath: @item_time_xpath,
47
- title_xpath: @item_title_xpath,
46
+ time_selector: @item_time_selector,
47
+ title_selector: @item_title_selector,
48
48
  url: @url
49
49
  )
50
50
  end
@@ -5,27 +5,27 @@ require 'nokogiri'
5
5
 
6
6
  module Weneedfeed
7
7
  class Scraping
8
- # @param [String] item_description_xpath
9
- # @param [String] item_link_xpath
10
- # @param [String] item_time_xpath
11
- # @param [String] item_title_xpath
12
- # @param [String] item_xpath
8
+ # @param [String] item_description_selector
9
+ # @param [String] item_link_selector
10
+ # @param [String] item_time_selector
11
+ # @param [String] item_title_selector
12
+ # @param [String] item_selector
13
13
  # @param [String] title
14
14
  # @param [String] url
15
15
  def initialize(
16
- item_description_xpath:,
17
- item_link_xpath:,
18
- item_time_xpath:,
19
- item_title_xpath:,
20
- item_xpath:,
16
+ item_description_selector:,
17
+ item_link_selector:,
18
+ item_time_selector:,
19
+ item_title_selector:,
20
+ item_selector:,
21
21
  title:,
22
22
  url:
23
23
  )
24
- @item_description_xpath = item_description_xpath
25
- @item_link_xpath = item_link_xpath
26
- @item_time_xpath = item_time_xpath
27
- @item_title_xpath = item_title_xpath
28
- @item_xpath = item_xpath
24
+ @item_description_selector = item_description_selector
25
+ @item_link_selector = item_link_selector
26
+ @item_time_selector = item_time_selector
27
+ @item_title_selector = item_title_selector
28
+ @item_selector = item_selector
29
29
  @title = title
30
30
  @url = url
31
31
  end
@@ -34,11 +34,11 @@ module Weneedfeed
34
34
  def call
35
35
  ::Weneedfeed::Page.new(
36
36
  node: parsed_body,
37
- item_description_xpath: @item_description_xpath,
38
- item_xpath: @item_xpath,
39
- item_link_xpath: @item_link_xpath,
40
- item_time_xpath: @item_time_xpath,
41
- item_title_xpath: @item_title_xpath,
37
+ item_description_selector: @item_description_selector,
38
+ item_selector: @item_selector,
39
+ item_link_selector: @item_link_selector,
40
+ item_time_selector: @item_time_selector,
41
+ item_title_selector: @item_title_selector,
42
42
  title: @title,
43
43
  url: @url
44
44
  )
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Weneedfeed
4
- VERSION = '0.3.0'
4
+ VERSION = '0.4.0'
5
5
  end
@@ -11,6 +11,11 @@ module Weneedfeed
11
11
 
12
12
  private
13
13
 
14
+ # @return [String]
15
+ def top_page_path
16
+ request.path.delete_suffix(router.path(:feed, page_name: page_name))
17
+ end
18
+
14
19
  # @return [Enumerable<Weneedfeed::Item>]
15
20
  def items
16
21
  @page.items.sort_by do |item|
@@ -22,6 +27,11 @@ module Weneedfeed
22
27
  def page_name
23
28
  request.env['router.params'][:page_name]
24
29
  end
30
+
31
+ # @return [Hanami::Router]
32
+ def router
33
+ ::Weneedfeed::Application.router
34
+ end
25
35
  end
26
36
  end
27
37
  end
@@ -4,7 +4,7 @@
4
4
  xmlns:content="http://purl.org/rss/1.0/modules/content/">
5
5
  <channel>
6
6
  <title><![CDATA[<%= @page.title %>]]></title>
7
- <link><%= "#{request.base_url}#{request.path}" %></link>
7
+ <link><%= "#{request.base_url}#{top_page_path}" %></link>
8
8
  <atom:link href="<%= "#{request.base_url}#{request.path}" %>" rel="self"/>
9
9
  <description><![CDATA[Recent content on <%= @page.title %>]]></description>
10
10
  <lastBuildDate><%= Time.now.rfc822 %></lastBuildDate>
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ['Ryo Nakamura']
9
9
  spec.email = ['r7kamura@gmail.com']
10
10
 
11
- spec.summary = 'Generate feeds from URL and XPath.'
11
+ spec.summary = 'Generate feeds from URL and selectors.'
12
12
  spec.homepage = 'https://github.com/r7kamura/weneedfeed'
13
13
  spec.license = 'MIT'
14
14
  spec.required_ruby_version = Gem::Requirement.new('>= 2.5.0')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: weneedfeed
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
@@ -142,5 +142,5 @@ requirements: []
142
142
  rubygems_version: 3.1.2
143
143
  signing_key:
144
144
  specification_version: 4
145
- summary: Generate feeds from URL and XPath.
145
+ summary: Generate feeds from URL and selectors.
146
146
  test_files: []