webg 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/webg +22 -5
- data/lib/webg/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 941940d059cf78116a42cdf6733bea9dd14a243e1fcde8773494a6eabb3c5e43
|
4
|
+
data.tar.gz: c2188442e6ab0f3cefea91d0a900259fcf254184b9fe38076a34899e4b924196
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35f501258470789b85c1ea9543a611ae35d1e40ac35317fdf6fd0a139e1111ed7e7772ecba1f7fd7a7a7feef5eb27e602333cc9da1ade6ebc1bcaea051c357df
|
7
|
+
data.tar.gz: cfb7b398c2c399b79bac1d7ffa22b15f102da564a94c22a9fabd7bbe98af41926e3c9e5b6dc648feef2eb06faea778c99d22bf1c84b6d8082942616f5b8abc0e
|
data/exe/webg
CHANGED
@@ -7,6 +7,14 @@ require "nokogiri"
|
|
7
7
|
|
8
8
|
require "webg/version"
|
9
9
|
|
10
|
+
module Squeezable
|
11
|
+
def squeezed_text(document)
|
12
|
+
return text(document).each_line.map { |l|
|
13
|
+
l.gsub(/\p{Space}+/, " ").strip
|
14
|
+
}.join("\n").strip.gsub(/\n{3,}/, "\n\n")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
10
18
|
module Fetcher
|
11
19
|
end
|
12
20
|
|
@@ -41,6 +49,8 @@ module Selector
|
|
41
49
|
end
|
42
50
|
|
43
51
|
class Selector::All
|
52
|
+
include Squeezable
|
53
|
+
|
44
54
|
REJECT_TAG_NAMES = %w[script noscript style]
|
45
55
|
|
46
56
|
def raw(document)
|
@@ -54,6 +64,8 @@ class Selector::All
|
|
54
64
|
end
|
55
65
|
|
56
66
|
class Selector::Css
|
67
|
+
include Squeezable
|
68
|
+
|
57
69
|
def initialize(css_selectors)
|
58
70
|
@css_selectors = css_selectors
|
59
71
|
end
|
@@ -78,7 +90,7 @@ def parse_options(argv)
|
|
78
90
|
fetcher = :raw
|
79
91
|
headers = {}
|
80
92
|
css_selectors = []
|
81
|
-
|
93
|
+
output_method_name = :raw
|
82
94
|
|
83
95
|
parser = OptionParser.new
|
84
96
|
parser.version = Webg::VERSION
|
@@ -121,7 +133,13 @@ def parse_options(argv)
|
|
121
133
|
"--text",
|
122
134
|
"output only text",
|
123
135
|
) do
|
124
|
-
|
136
|
+
output_method_name = :text
|
137
|
+
end
|
138
|
+
parser.on(
|
139
|
+
"--squeezed-text",
|
140
|
+
"output text with squeezing white spaces",
|
141
|
+
) do
|
142
|
+
output_method_name = :squeezed_text
|
125
143
|
end
|
126
144
|
parser.parse!(argv)
|
127
145
|
|
@@ -132,15 +150,14 @@ def parse_options(argv)
|
|
132
150
|
end
|
133
151
|
uri = URI(uri)
|
134
152
|
|
135
|
-
return uri, fetcher, headers, css_selectors,
|
153
|
+
return uri, fetcher, headers, css_selectors, output_method_name
|
136
154
|
end
|
137
155
|
|
138
156
|
begin
|
139
|
-
uri, fetcher_name, headers, css_selectors,
|
157
|
+
uri, fetcher_name, headers, css_selectors, output_method_name = parse_options(ARGV)
|
140
158
|
|
141
159
|
fetcher = Fetcher.const_get(fetcher_name.capitalize).new(headers)
|
142
160
|
selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
|
143
|
-
output_method_name = text ? :text : :raw
|
144
161
|
|
145
162
|
document = Nokogiri::HTML.parse(fetcher.(uri))
|
146
163
|
puts(selector.public_send(output_method_name, document))
|
data/lib/webg/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuya.Nishida.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
77
|
- !ruby/object:Gem::Version
|
78
78
|
version: '0'
|
79
79
|
requirements: []
|
80
|
-
rubygems_version: 3.
|
80
|
+
rubygems_version: 3.3.7
|
81
81
|
signing_key:
|
82
82
|
specification_version: 4
|
83
83
|
summary: 'webg: A downloader to get web page with JavaScript'
|