webg 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/webg +43 -19
- data/lib/webg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7bdc7060ad44a00a15487e6905f336ecfe2f144940e06253d0073c183236bffd
|
4
|
+
data.tar.gz: 897236c858cc3fa6fee45d83677213318a6d11610646fac6214e1cf07e2af553
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: babb391559cd6b4e67fd86ca7c172b4ed341c378f4a5a3b4d3b3ba569036c48fcafc65143b17990b472aafdca7d11fe680b18fa7825bd4a023c2392dbbcee8ca
|
7
|
+
data.tar.gz: a99d118c3041fac293b5efcfb76643521c81a63cb827d489686abe49cb5b2cd068d1cbe54ad938663d6b208beaf95d5e59740bbce468b23e642c59e9d9193422
|
data/exe/webg
CHANGED
@@ -3,36 +3,51 @@
|
|
3
3
|
require "optparse"
|
4
4
|
require "uri"
|
5
5
|
|
6
|
-
require "
|
6
|
+
require "nokogiri"
|
7
7
|
|
8
8
|
require "webg/version"
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
raise NotImplementedError, "sub class must override."
|
13
|
-
end
|
10
|
+
module Fetcher
|
11
|
+
end
|
14
12
|
|
15
|
-
|
16
|
-
|
13
|
+
class Fetcher::Raw
|
14
|
+
def call(uri)
|
15
|
+
require("open-uri")
|
16
|
+
return uri.read
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
class
|
21
|
-
def
|
20
|
+
class Fetcher::Firefox
|
21
|
+
def call(uri)
|
22
|
+
require("capybara")
|
23
|
+
session = Capybara::Session.new(:selenium_headless)
|
24
|
+
session.visit(uri)
|
22
25
|
return session.body
|
23
26
|
end
|
27
|
+
end
|
24
28
|
|
25
|
-
|
26
|
-
|
29
|
+
module Selector
|
30
|
+
end
|
31
|
+
|
32
|
+
class Selector::All
|
33
|
+
REJECT_TAG_NAMES = %w[script noscript style]
|
34
|
+
|
35
|
+
def raw(document)
|
36
|
+
return document.to_s
|
37
|
+
end
|
38
|
+
|
39
|
+
def text(document)
|
40
|
+
document.css(REJECT_TAG_NAMES.join(", ")).remove
|
41
|
+
return document.inner_text
|
27
42
|
end
|
28
43
|
end
|
29
44
|
|
30
|
-
class Selector::Css
|
45
|
+
class Selector::Css
|
31
46
|
def initialize(css_selectors)
|
32
47
|
@css_selectors = css_selectors
|
33
48
|
end
|
34
49
|
|
35
|
-
def raw(
|
50
|
+
def raw(document)
|
36
51
|
return nodes(session).map { |node|
|
37
52
|
node.evaluate_script('this.outerHTML')
|
38
53
|
}.join("\n")
|
@@ -51,14 +66,21 @@ end
|
|
51
66
|
|
52
67
|
def parse_options(argv)
|
53
68
|
argv = argv.dup
|
54
|
-
|
69
|
+
fetcher = :raw
|
55
70
|
css_selectors = []
|
71
|
+
text = false
|
56
72
|
|
57
73
|
parser = OptionParser.new
|
58
74
|
parser.version = Webg::VERSION
|
59
75
|
parser.banner = "Usage: #{File.basename(Process.argv0)} [options] uri"
|
60
76
|
parser.separator("")
|
61
77
|
parser.separator("Options:")
|
78
|
+
parser.on(
|
79
|
+
"--firefox",
|
80
|
+
"use Firefox to fetch web page. slow, but JavaScript-evaluatable",
|
81
|
+
) do
|
82
|
+
fetcher = :firefox
|
83
|
+
end
|
62
84
|
parser.on(
|
63
85
|
"--css-selector=SELECTOR",
|
64
86
|
"specify css selector to filter output.",
|
@@ -80,16 +102,18 @@ def parse_options(argv)
|
|
80
102
|
end
|
81
103
|
uri = URI(uri)
|
82
104
|
|
83
|
-
return uri,
|
105
|
+
return uri, fetcher, css_selectors, text
|
84
106
|
end
|
85
107
|
|
86
108
|
begin
|
87
|
-
uri,
|
88
|
-
|
89
|
-
|
109
|
+
uri, fetcher_name, css_selectors, text = parse_options(ARGV)
|
110
|
+
|
111
|
+
fetcher = Fetcher.const_get(fetcher_name.capitalize).new
|
90
112
|
selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
|
91
113
|
output_method_name = text ? :text : :raw
|
92
|
-
|
114
|
+
|
115
|
+
document = Nokogiri::HTML.parse(fetcher.(uri))
|
116
|
+
puts(selector.public_send(output_method_name, document))
|
93
117
|
rescue => e
|
94
118
|
$stderr.puts(e.message)
|
95
119
|
exit(1)
|
data/lib/webg/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuya.Nishida.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|