webg 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/exe/webg +22 -5
  3. data/lib/webg/version.rb +1 -1
  4. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0da83b75f201d6de0298ee4e5476b7938a168dc854d124176b32b2de787c0a3
4
- data.tar.gz: 03e37742164d66ecdded90049451bfe53454508a3db4e4dd2fa08b53a7740c73
3
+ metadata.gz: 941940d059cf78116a42cdf6733bea9dd14a243e1fcde8773494a6eabb3c5e43
4
+ data.tar.gz: c2188442e6ab0f3cefea91d0a900259fcf254184b9fe38076a34899e4b924196
5
5
  SHA512:
6
- metadata.gz: db0bac63d91251461323b2c058dca1e8f55c787246c0bedd698051dbabf1fc41f1317e8a38a8f48674bc03aff12dfde7127764d0cc9bb1643d78a940599c5b22
7
- data.tar.gz: 9289f3b9f16f436479d64c692013369492ab6cd3a5487ad5044546e7f18269c6073195e079885480af66ee0833f736b35c5f9433e2f0988d48cad361d6888d5b
6
+ metadata.gz: 35f501258470789b85c1ea9543a611ae35d1e40ac35317fdf6fd0a139e1111ed7e7772ecba1f7fd7a7a7feef5eb27e602333cc9da1ade6ebc1bcaea051c357df
7
+ data.tar.gz: cfb7b398c2c399b79bac1d7ffa22b15f102da564a94c22a9fabd7bbe98af41926e3c9e5b6dc648feef2eb06faea778c99d22bf1c84b6d8082942616f5b8abc0e
data/exe/webg CHANGED
@@ -7,6 +7,14 @@ require "nokogiri"
7
7
 
8
8
  require "webg/version"
9
9
 
10
+ module Squeezable
11
+ def squeezed_text(document)
12
+ return text(document).each_line.map { |l|
13
+ l.gsub(/\p{Space}+/, " ").strip
14
+ }.join("\n").strip.gsub(/\n{3,}/, "\n\n")
15
+ end
16
+ end
17
+
10
18
  module Fetcher
11
19
  end
12
20
 
@@ -41,6 +49,8 @@ module Selector
41
49
  end
42
50
 
43
51
  class Selector::All
52
+ include Squeezable
53
+
44
54
  REJECT_TAG_NAMES = %w[script noscript style]
45
55
 
46
56
  def raw(document)
@@ -54,6 +64,8 @@ class Selector::All
54
64
  end
55
65
 
56
66
  class Selector::Css
67
+ include Squeezable
68
+
57
69
  def initialize(css_selectors)
58
70
  @css_selectors = css_selectors
59
71
  end
@@ -78,7 +90,7 @@ def parse_options(argv)
78
90
  fetcher = :raw
79
91
  headers = {}
80
92
  css_selectors = []
81
- text = false
93
+ output_method_name = :raw
82
94
 
83
95
  parser = OptionParser.new
84
96
  parser.version = Webg::VERSION
@@ -121,7 +133,13 @@ def parse_options(argv)
121
133
  "--text",
122
134
  "output only text",
123
135
  ) do
124
- text = true
136
+ output_method_name = :text
137
+ end
138
+ parser.on(
139
+ "--squeezed-text",
140
+ "output text with squeezing white spaces",
141
+ ) do
142
+ output_method_name = :squeezed_text
125
143
  end
126
144
  parser.parse!(argv)
127
145
 
@@ -132,15 +150,14 @@ def parse_options(argv)
132
150
  end
133
151
  uri = URI(uri)
134
152
 
135
- return uri, fetcher, headers, css_selectors, text
153
+ return uri, fetcher, headers, css_selectors, output_method_name
136
154
  end
137
155
 
138
156
  begin
139
- uri, fetcher_name, headers, css_selectors, text = parse_options(ARGV)
157
+ uri, fetcher_name, headers, css_selectors, output_method_name = parse_options(ARGV)
140
158
 
141
159
  fetcher = Fetcher.const_get(fetcher_name.capitalize).new(headers)
142
160
  selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
143
- output_method_name = text ? :text : :raw
144
161
 
145
162
  document = Nokogiri::HTML.parse(fetcher.(uri))
146
163
  puts(selector.public_send(output_method_name, document))
data/lib/webg/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Webg
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuya.Nishida.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2022-07-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
77
  - !ruby/object:Gem::Version
78
78
  version: '0'
79
79
  requirements: []
80
- rubygems_version: 3.2.15
80
+ rubygems_version: 3.3.7
81
81
  signing_key:
82
82
  specification_version: 4
83
83
  summary: 'webg: A downloader to get web page with JavaScript'