webg 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/webg +36 -10
- data/lib/webg/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49b6394ac54ba2458a1271cb219a41f303b19d69444c79e621510d45a238e7d4
|
4
|
+
data.tar.gz: b58f95968573e4a3a03934c51038cda94f897f09175e12348e3eb86ff0ca5ce6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2cebf09aab37a31242790cd2f4dc2b8949611182979a4d0cf8f6884755b91818d5586892e0108e1a290bcc701be5b0322cb2e5dec84a12074f1e869099c88241
|
7
|
+
data.tar.gz: 2778038f804c561e97c9c8ce8d2e87cd9bb71f8f0b8b4d4fba5caa4b77ae26252b9653a91554ceea57c2c955ec38fb863fe7b275e1d53aecea84b314c537f062
|
data/exe/webg
CHANGED
@@ -7,6 +7,14 @@ require "nokogiri"
|
|
7
7
|
|
8
8
|
require "webg/version"
|
9
9
|
|
10
|
+
module Squeezable
|
11
|
+
def squeezed_text(document)
|
12
|
+
return text(document).each_line.map { |l|
|
13
|
+
l.gsub(/\p{Space}+/, " ").strip
|
14
|
+
}.join("\n").strip.gsub(/\n{3,}/, "\n\n")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
10
18
|
module Fetcher
|
11
19
|
end
|
12
20
|
|
@@ -23,6 +31,15 @@ class Fetcher::Raw
|
|
23
31
|
end
|
24
32
|
end
|
25
33
|
|
34
|
+
class Fetcher::Stdin
|
35
|
+
def initialize(_headers)
|
36
|
+
end
|
37
|
+
|
38
|
+
def call(_uri)
|
39
|
+
return $stdin
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
26
43
|
class Fetcher::Firefox
|
27
44
|
def initialize(headers)
|
28
45
|
return if headers.empty?
|
@@ -41,6 +58,8 @@ module Selector
|
|
41
58
|
end
|
42
59
|
|
43
60
|
class Selector::All
|
61
|
+
include Squeezable
|
62
|
+
|
44
63
|
REJECT_TAG_NAMES = %w[script noscript style]
|
45
64
|
|
46
65
|
def raw(document)
|
@@ -54,6 +73,8 @@ class Selector::All
|
|
54
73
|
end
|
55
74
|
|
56
75
|
class Selector::Css
|
76
|
+
include Squeezable
|
77
|
+
|
57
78
|
def initialize(css_selectors)
|
58
79
|
@css_selectors = css_selectors
|
59
80
|
end
|
@@ -78,7 +99,7 @@ def parse_options(argv)
|
|
78
99
|
fetcher = :raw
|
79
100
|
headers = {}
|
80
101
|
css_selectors = []
|
81
|
-
|
102
|
+
output_method_name = :raw
|
82
103
|
|
83
104
|
parser = OptionParser.new
|
84
105
|
parser.version = Webg::VERSION
|
@@ -121,26 +142,31 @@ def parse_options(argv)
|
|
121
142
|
"--text",
|
122
143
|
"output only text",
|
123
144
|
) do
|
124
|
-
|
145
|
+
output_method_name = :text
|
146
|
+
end
|
147
|
+
parser.on(
|
148
|
+
"--squeezed-text",
|
149
|
+
"output text with squeezing white spaces",
|
150
|
+
) do
|
151
|
+
output_method_name = :squeezed_text
|
125
152
|
end
|
126
153
|
parser.parse!(argv)
|
127
154
|
|
128
|
-
|
129
|
-
if
|
130
|
-
|
131
|
-
|
155
|
+
s_uri = argv.shift
|
156
|
+
if s_uri
|
157
|
+
uri = URI(s_uri)
|
158
|
+
else
|
159
|
+
fetcher = :stdin
|
132
160
|
end
|
133
|
-
uri = URI(uri)
|
134
161
|
|
135
|
-
return uri, fetcher, headers, css_selectors,
|
162
|
+
return uri, fetcher, headers, css_selectors, output_method_name
|
136
163
|
end
|
137
164
|
|
138
165
|
begin
|
139
|
-
uri, fetcher_name, headers, css_selectors,
|
166
|
+
uri, fetcher_name, headers, css_selectors, output_method_name = parse_options(ARGV)
|
140
167
|
|
141
168
|
fetcher = Fetcher.const_get(fetcher_name.capitalize).new(headers)
|
142
169
|
selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
|
143
|
-
output_method_name = text ? :text : :raw
|
144
170
|
|
145
171
|
document = Nokogiri::HTML.parse(fetcher.(uri))
|
146
172
|
puts(selector.public_send(output_method_name, document))
|
data/lib/webg/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuya.Nishida.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
77
|
- !ruby/object:Gem::Version
|
78
78
|
version: '0'
|
79
79
|
requirements: []
|
80
|
-
rubygems_version: 3.
|
80
|
+
rubygems_version: 3.4.10
|
81
81
|
signing_key:
|
82
82
|
specification_version: 4
|
83
83
|
summary: 'webg: A downloader to get web page with JavaScript'
|