webg 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/webg +22 -5
- data/lib/webg/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 941940d059cf78116a42cdf6733bea9dd14a243e1fcde8773494a6eabb3c5e43
|
4
|
+
data.tar.gz: c2188442e6ab0f3cefea91d0a900259fcf254184b9fe38076a34899e4b924196
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35f501258470789b85c1ea9543a611ae35d1e40ac35317fdf6fd0a139e1111ed7e7772ecba1f7fd7a7a7feef5eb27e602333cc9da1ade6ebc1bcaea051c357df
|
7
|
+
data.tar.gz: cfb7b398c2c399b79bac1d7ffa22b15f102da564a94c22a9fabd7bbe98af41926e3c9e5b6dc648feef2eb06faea778c99d22bf1c84b6d8082942616f5b8abc0e
|
data/exe/webg
CHANGED
@@ -7,6 +7,14 @@ require "nokogiri"
|
|
7
7
|
|
8
8
|
require "webg/version"
|
9
9
|
|
10
|
+
module Squeezable
|
11
|
+
def squeezed_text(document)
|
12
|
+
return text(document).each_line.map { |l|
|
13
|
+
l.gsub(/\p{Space}+/, " ").strip
|
14
|
+
}.join("\n").strip.gsub(/\n{3,}/, "\n\n")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
10
18
|
module Fetcher
|
11
19
|
end
|
12
20
|
|
@@ -41,6 +49,8 @@ module Selector
|
|
41
49
|
end
|
42
50
|
|
43
51
|
class Selector::All
|
52
|
+
include Squeezable
|
53
|
+
|
44
54
|
REJECT_TAG_NAMES = %w[script noscript style]
|
45
55
|
|
46
56
|
def raw(document)
|
@@ -54,6 +64,8 @@ class Selector::All
|
|
54
64
|
end
|
55
65
|
|
56
66
|
class Selector::Css
|
67
|
+
include Squeezable
|
68
|
+
|
57
69
|
def initialize(css_selectors)
|
58
70
|
@css_selectors = css_selectors
|
59
71
|
end
|
@@ -78,7 +90,7 @@ def parse_options(argv)
|
|
78
90
|
fetcher = :raw
|
79
91
|
headers = {}
|
80
92
|
css_selectors = []
|
81
|
-
|
93
|
+
output_method_name = :raw
|
82
94
|
|
83
95
|
parser = OptionParser.new
|
84
96
|
parser.version = Webg::VERSION
|
@@ -121,7 +133,13 @@ def parse_options(argv)
|
|
121
133
|
"--text",
|
122
134
|
"output only text",
|
123
135
|
) do
|
124
|
-
|
136
|
+
output_method_name = :text
|
137
|
+
end
|
138
|
+
parser.on(
|
139
|
+
"--squeezed-text",
|
140
|
+
"output text with squeezing white spaces",
|
141
|
+
) do
|
142
|
+
output_method_name = :squeezed_text
|
125
143
|
end
|
126
144
|
parser.parse!(argv)
|
127
145
|
|
@@ -132,15 +150,14 @@ def parse_options(argv)
|
|
132
150
|
end
|
133
151
|
uri = URI(uri)
|
134
152
|
|
135
|
-
return uri, fetcher, headers, css_selectors,
|
153
|
+
return uri, fetcher, headers, css_selectors, output_method_name
|
136
154
|
end
|
137
155
|
|
138
156
|
begin
|
139
|
-
uri, fetcher_name, headers, css_selectors,
|
157
|
+
uri, fetcher_name, headers, css_selectors, output_method_name = parse_options(ARGV)
|
140
158
|
|
141
159
|
fetcher = Fetcher.const_get(fetcher_name.capitalize).new(headers)
|
142
160
|
selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
|
143
|
-
output_method_name = text ? :text : :raw
|
144
161
|
|
145
162
|
document = Nokogiri::HTML.parse(fetcher.(uri))
|
146
163
|
puts(selector.public_send(output_method_name, document))
|
data/lib/webg/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuya.Nishida.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
77
|
- !ruby/object:Gem::Version
|
78
78
|
version: '0'
|
79
79
|
requirements: []
|
80
|
-
rubygems_version: 3.
|
80
|
+
rubygems_version: 3.3.7
|
81
81
|
signing_key:
|
82
82
|
specification_version: 4
|
83
83
|
summary: 'webg: A downloader to get web page with JavaScript'
|