webg 0.2.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/webg +36 -10
- data/lib/webg/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49b6394ac54ba2458a1271cb219a41f303b19d69444c79e621510d45a238e7d4
|
4
|
+
data.tar.gz: b58f95968573e4a3a03934c51038cda94f897f09175e12348e3eb86ff0ca5ce6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2cebf09aab37a31242790cd2f4dc2b8949611182979a4d0cf8f6884755b91818d5586892e0108e1a290bcc701be5b0322cb2e5dec84a12074f1e869099c88241
|
7
|
+
data.tar.gz: 2778038f804c561e97c9c8ce8d2e87cd9bb71f8f0b8b4d4fba5caa4b77ae26252b9653a91554ceea57c2c955ec38fb863fe7b275e1d53aecea84b314c537f062
|
data/exe/webg
CHANGED
@@ -7,6 +7,14 @@ require "nokogiri"
|
|
7
7
|
|
8
8
|
require "webg/version"
|
9
9
|
|
10
|
+
module Squeezable
|
11
|
+
def squeezed_text(document)
|
12
|
+
return text(document).each_line.map { |l|
|
13
|
+
l.gsub(/\p{Space}+/, " ").strip
|
14
|
+
}.join("\n").strip.gsub(/\n{3,}/, "\n\n")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
10
18
|
module Fetcher
|
11
19
|
end
|
12
20
|
|
@@ -23,6 +31,15 @@ class Fetcher::Raw
|
|
23
31
|
end
|
24
32
|
end
|
25
33
|
|
34
|
+
class Fetcher::Stdin
|
35
|
+
def initialize(_headers)
|
36
|
+
end
|
37
|
+
|
38
|
+
def call(_uri)
|
39
|
+
return $stdin
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
26
43
|
class Fetcher::Firefox
|
27
44
|
def initialize(headers)
|
28
45
|
return if headers.empty?
|
@@ -41,6 +58,8 @@ module Selector
|
|
41
58
|
end
|
42
59
|
|
43
60
|
class Selector::All
|
61
|
+
include Squeezable
|
62
|
+
|
44
63
|
REJECT_TAG_NAMES = %w[script noscript style]
|
45
64
|
|
46
65
|
def raw(document)
|
@@ -54,6 +73,8 @@ class Selector::All
|
|
54
73
|
end
|
55
74
|
|
56
75
|
class Selector::Css
|
76
|
+
include Squeezable
|
77
|
+
|
57
78
|
def initialize(css_selectors)
|
58
79
|
@css_selectors = css_selectors
|
59
80
|
end
|
@@ -78,7 +99,7 @@ def parse_options(argv)
|
|
78
99
|
fetcher = :raw
|
79
100
|
headers = {}
|
80
101
|
css_selectors = []
|
81
|
-
|
102
|
+
output_method_name = :raw
|
82
103
|
|
83
104
|
parser = OptionParser.new
|
84
105
|
parser.version = Webg::VERSION
|
@@ -121,26 +142,31 @@ def parse_options(argv)
|
|
121
142
|
"--text",
|
122
143
|
"output only text",
|
123
144
|
) do
|
124
|
-
|
145
|
+
output_method_name = :text
|
146
|
+
end
|
147
|
+
parser.on(
|
148
|
+
"--squeezed-text",
|
149
|
+
"output text with squeezing white spaces",
|
150
|
+
) do
|
151
|
+
output_method_name = :squeezed_text
|
125
152
|
end
|
126
153
|
parser.parse!(argv)
|
127
154
|
|
128
|
-
|
129
|
-
if
|
130
|
-
|
131
|
-
|
155
|
+
s_uri = argv.shift
|
156
|
+
if s_uri
|
157
|
+
uri = URI(s_uri)
|
158
|
+
else
|
159
|
+
fetcher = :stdin
|
132
160
|
end
|
133
|
-
uri = URI(uri)
|
134
161
|
|
135
|
-
return uri, fetcher, headers, css_selectors,
|
162
|
+
return uri, fetcher, headers, css_selectors, output_method_name
|
136
163
|
end
|
137
164
|
|
138
165
|
begin
|
139
|
-
uri, fetcher_name, headers, css_selectors,
|
166
|
+
uri, fetcher_name, headers, css_selectors, output_method_name = parse_options(ARGV)
|
140
167
|
|
141
168
|
fetcher = Fetcher.const_get(fetcher_name.capitalize).new(headers)
|
142
169
|
selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
|
143
|
-
output_method_name = text ? :text : :raw
|
144
170
|
|
145
171
|
document = Nokogiri::HTML.parse(fetcher.(uri))
|
146
172
|
puts(selector.public_send(output_method_name, document))
|
data/lib/webg/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuya.Nishida.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
77
|
- !ruby/object:Gem::Version
|
78
78
|
version: '0'
|
79
79
|
requirements: []
|
80
|
-
rubygems_version: 3.
|
80
|
+
rubygems_version: 3.4.10
|
81
81
|
signing_key:
|
82
82
|
specification_version: 4
|
83
83
|
summary: 'webg: A downloader to get web page with JavaScript'
|