webg 0.2.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/exe/webg +36 -10
  3. data/lib/webg/version.rb +1 -1
  4. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0da83b75f201d6de0298ee4e5476b7938a168dc854d124176b32b2de787c0a3
4
- data.tar.gz: 03e37742164d66ecdded90049451bfe53454508a3db4e4dd2fa08b53a7740c73
3
+ metadata.gz: 49b6394ac54ba2458a1271cb219a41f303b19d69444c79e621510d45a238e7d4
4
+ data.tar.gz: b58f95968573e4a3a03934c51038cda94f897f09175e12348e3eb86ff0ca5ce6
5
5
  SHA512:
6
- metadata.gz: db0bac63d91251461323b2c058dca1e8f55c787246c0bedd698051dbabf1fc41f1317e8a38a8f48674bc03aff12dfde7127764d0cc9bb1643d78a940599c5b22
7
- data.tar.gz: 9289f3b9f16f436479d64c692013369492ab6cd3a5487ad5044546e7f18269c6073195e079885480af66ee0833f736b35c5f9433e2f0988d48cad361d6888d5b
6
+ metadata.gz: 2cebf09aab37a31242790cd2f4dc2b8949611182979a4d0cf8f6884755b91818d5586892e0108e1a290bcc701be5b0322cb2e5dec84a12074f1e869099c88241
7
+ data.tar.gz: 2778038f804c561e97c9c8ce8d2e87cd9bb71f8f0b8b4d4fba5caa4b77ae26252b9653a91554ceea57c2c955ec38fb863fe7b275e1d53aecea84b314c537f062
data/exe/webg CHANGED
@@ -7,6 +7,14 @@ require "nokogiri"
7
7
 
8
8
  require "webg/version"
9
9
 
10
+ module Squeezable
11
+ def squeezed_text(document)
12
+ return text(document).each_line.map { |l|
13
+ l.gsub(/\p{Space}+/, " ").strip
14
+ }.join("\n").strip.gsub(/\n{3,}/, "\n\n")
15
+ end
16
+ end
17
+
10
18
  module Fetcher
11
19
  end
12
20
 
@@ -23,6 +31,15 @@ class Fetcher::Raw
23
31
  end
24
32
  end
25
33
 
34
+ class Fetcher::Stdin
35
+ def initialize(_headers)
36
+ end
37
+
38
+ def call(_uri)
39
+ return $stdin
40
+ end
41
+ end
42
+
26
43
  class Fetcher::Firefox
27
44
  def initialize(headers)
28
45
  return if headers.empty?
@@ -41,6 +58,8 @@ module Selector
41
58
  end
42
59
 
43
60
  class Selector::All
61
+ include Squeezable
62
+
44
63
  REJECT_TAG_NAMES = %w[script noscript style]
45
64
 
46
65
  def raw(document)
@@ -54,6 +73,8 @@ class Selector::All
54
73
  end
55
74
 
56
75
  class Selector::Css
76
+ include Squeezable
77
+
57
78
  def initialize(css_selectors)
58
79
  @css_selectors = css_selectors
59
80
  end
@@ -78,7 +99,7 @@ def parse_options(argv)
78
99
  fetcher = :raw
79
100
  headers = {}
80
101
  css_selectors = []
81
- text = false
102
+ output_method_name = :raw
82
103
 
83
104
  parser = OptionParser.new
84
105
  parser.version = Webg::VERSION
@@ -121,26 +142,31 @@ def parse_options(argv)
121
142
  "--text",
122
143
  "output only text",
123
144
  ) do
124
- text = true
145
+ output_method_name = :text
146
+ end
147
+ parser.on(
148
+ "--squeezed-text",
149
+ "output text with squeezing white spaces",
150
+ ) do
151
+ output_method_name = :squeezed_text
125
152
  end
126
153
  parser.parse!(argv)
127
154
 
128
- uri = argv.shift
129
- if !uri
130
- $stderr.puts(parser.help)
131
- exit(1)
155
+ s_uri = argv.shift
156
+ if s_uri
157
+ uri = URI(s_uri)
158
+ else
159
+ fetcher = :stdin
132
160
  end
133
- uri = URI(uri)
134
161
 
135
- return uri, fetcher, headers, css_selectors, text
162
+ return uri, fetcher, headers, css_selectors, output_method_name
136
163
  end
137
164
 
138
165
  begin
139
- uri, fetcher_name, headers, css_selectors, text = parse_options(ARGV)
166
+ uri, fetcher_name, headers, css_selectors, output_method_name = parse_options(ARGV)
140
167
 
141
168
  fetcher = Fetcher.const_get(fetcher_name.capitalize).new(headers)
142
169
  selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
143
- output_method_name = text ? :text : :raw
144
170
 
145
171
  document = Nokogiri::HTML.parse(fetcher.(uri))
146
172
  puts(selector.public_send(output_method_name, document))
data/lib/webg/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Webg
4
- VERSION = "0.2.0"
4
+ VERSION = "0.4.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuya.Nishida.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-08 00:00:00.000000000 Z
11
+ date: 2023-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
77
  - !ruby/object:Gem::Version
78
78
  version: '0'
79
79
  requirements: []
80
- rubygems_version: 3.2.15
80
+ rubygems_version: 3.4.10
81
81
  signing_key:
82
82
  specification_version: 4
83
83
  summary: 'webg: A downloader to get web page with JavaScript'