webg 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/exe/webg +36 -10
  3. data/lib/webg/version.rb +1 -1
  4. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0da83b75f201d6de0298ee4e5476b7938a168dc854d124176b32b2de787c0a3
4
- data.tar.gz: 03e37742164d66ecdded90049451bfe53454508a3db4e4dd2fa08b53a7740c73
3
+ metadata.gz: 49b6394ac54ba2458a1271cb219a41f303b19d69444c79e621510d45a238e7d4
4
+ data.tar.gz: b58f95968573e4a3a03934c51038cda94f897f09175e12348e3eb86ff0ca5ce6
5
5
  SHA512:
6
- metadata.gz: db0bac63d91251461323b2c058dca1e8f55c787246c0bedd698051dbabf1fc41f1317e8a38a8f48674bc03aff12dfde7127764d0cc9bb1643d78a940599c5b22
7
- data.tar.gz: 9289f3b9f16f436479d64c692013369492ab6cd3a5487ad5044546e7f18269c6073195e079885480af66ee0833f736b35c5f9433e2f0988d48cad361d6888d5b
6
+ metadata.gz: 2cebf09aab37a31242790cd2f4dc2b8949611182979a4d0cf8f6884755b91818d5586892e0108e1a290bcc701be5b0322cb2e5dec84a12074f1e869099c88241
7
+ data.tar.gz: 2778038f804c561e97c9c8ce8d2e87cd9bb71f8f0b8b4d4fba5caa4b77ae26252b9653a91554ceea57c2c955ec38fb863fe7b275e1d53aecea84b314c537f062
data/exe/webg CHANGED
@@ -7,6 +7,14 @@ require "nokogiri"
7
7
 
8
8
  require "webg/version"
9
9
 
10
+ module Squeezable
11
+ def squeezed_text(document)
12
+ return text(document).each_line.map { |l|
13
+ l.gsub(/\p{Space}+/, " ").strip
14
+ }.join("\n").strip.gsub(/\n{3,}/, "\n\n")
15
+ end
16
+ end
17
+
10
18
  module Fetcher
11
19
  end
12
20
 
@@ -23,6 +31,15 @@ class Fetcher::Raw
23
31
  end
24
32
  end
25
33
 
34
+ class Fetcher::Stdin
35
+ def initialize(_headers)
36
+ end
37
+
38
+ def call(_uri)
39
+ return $stdin
40
+ end
41
+ end
42
+
26
43
  class Fetcher::Firefox
27
44
  def initialize(headers)
28
45
  return if headers.empty?
@@ -41,6 +58,8 @@ module Selector
41
58
  end
42
59
 
43
60
  class Selector::All
61
+ include Squeezable
62
+
44
63
  REJECT_TAG_NAMES = %w[script noscript style]
45
64
 
46
65
  def raw(document)
@@ -54,6 +73,8 @@ class Selector::All
54
73
  end
55
74
 
56
75
  class Selector::Css
76
+ include Squeezable
77
+
57
78
  def initialize(css_selectors)
58
79
  @css_selectors = css_selectors
59
80
  end
@@ -78,7 +99,7 @@ def parse_options(argv)
78
99
  fetcher = :raw
79
100
  headers = {}
80
101
  css_selectors = []
81
- text = false
102
+ output_method_name = :raw
82
103
 
83
104
  parser = OptionParser.new
84
105
  parser.version = Webg::VERSION
@@ -121,26 +142,31 @@ def parse_options(argv)
121
142
  "--text",
122
143
  "output only text",
123
144
  ) do
124
- text = true
145
+ output_method_name = :text
146
+ end
147
+ parser.on(
148
+ "--squeezed-text",
149
+ "output text with squeezing white spaces",
150
+ ) do
151
+ output_method_name = :squeezed_text
125
152
  end
126
153
  parser.parse!(argv)
127
154
 
128
- uri = argv.shift
129
- if !uri
130
- $stderr.puts(parser.help)
131
- exit(1)
155
+ s_uri = argv.shift
156
+ if s_uri
157
+ uri = URI(s_uri)
158
+ else
159
+ fetcher = :stdin
132
160
  end
133
- uri = URI(uri)
134
161
 
135
- return uri, fetcher, headers, css_selectors, text
162
+ return uri, fetcher, headers, css_selectors, output_method_name
136
163
  end
137
164
 
138
165
  begin
139
- uri, fetcher_name, headers, css_selectors, text = parse_options(ARGV)
166
+ uri, fetcher_name, headers, css_selectors, output_method_name = parse_options(ARGV)
140
167
 
141
168
  fetcher = Fetcher.const_get(fetcher_name.capitalize).new(headers)
142
169
  selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
143
- output_method_name = text ? :text : :raw
144
170
 
145
171
  document = Nokogiri::HTML.parse(fetcher.(uri))
146
172
  puts(selector.public_send(output_method_name, document))
data/lib/webg/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Webg
4
- VERSION = "0.2.0"
4
+ VERSION = "0.4.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuya.Nishida.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2023-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
77
  - !ruby/object:Gem::Version
78
78
  version: '0'
79
79
  requirements: []
80
- rubygems_version: 3.2.15
80
+ rubygems_version: 3.4.10
81
81
  signing_key:
82
82
  specification_version: 4
83
83
  summary: 'webg: A downloader to get web page with JavaScript'