webg 0.0.0 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/webg +43 -19
- data/lib/webg/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7bdc7060ad44a00a15487e6905f336ecfe2f144940e06253d0073c183236bffd
|
4
|
+
data.tar.gz: 897236c858cc3fa6fee45d83677213318a6d11610646fac6214e1cf07e2af553
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: babb391559cd6b4e67fd86ca7c172b4ed341c378f4a5a3b4d3b3ba569036c48fcafc65143b17990b472aafdca7d11fe680b18fa7825bd4a023c2392dbbcee8ca
|
7
|
+
data.tar.gz: a99d118c3041fac293b5efcfb76643521c81a63cb827d489686abe49cb5b2cd068d1cbe54ad938663d6b208beaf95d5e59740bbce468b23e642c59e9d9193422
|
data/exe/webg
CHANGED
@@ -3,36 +3,51 @@
|
|
3
3
|
require "optparse"
|
4
4
|
require "uri"
|
5
5
|
|
6
|
-
require "
|
6
|
+
require "nokogiri"
|
7
7
|
|
8
8
|
require "webg/version"
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
raise NotImplementedError, "sub class must override."
|
13
|
-
end
|
10
|
+
module Fetcher
|
11
|
+
end
|
14
12
|
|
15
|
-
|
16
|
-
|
13
|
+
class Fetcher::Raw
|
14
|
+
def call(uri)
|
15
|
+
require("open-uri")
|
16
|
+
return uri.read
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
class
|
21
|
-
def
|
20
|
+
class Fetcher::Firefox
|
21
|
+
def call(uri)
|
22
|
+
require("capybara")
|
23
|
+
session = Capybara::Session.new(:selenium_headless)
|
24
|
+
session.visit(uri)
|
22
25
|
return session.body
|
23
26
|
end
|
27
|
+
end
|
24
28
|
|
25
|
-
|
26
|
-
|
29
|
+
module Selector
|
30
|
+
end
|
31
|
+
|
32
|
+
class Selector::All
|
33
|
+
REJECT_TAG_NAMES = %w[script noscript style]
|
34
|
+
|
35
|
+
def raw(document)
|
36
|
+
return document.to_s
|
37
|
+
end
|
38
|
+
|
39
|
+
def text(document)
|
40
|
+
document.css(REJECT_TAG_NAMES.join(", ")).remove
|
41
|
+
return document.inner_text
|
27
42
|
end
|
28
43
|
end
|
29
44
|
|
30
|
-
class Selector::Css
|
45
|
+
class Selector::Css
|
31
46
|
def initialize(css_selectors)
|
32
47
|
@css_selectors = css_selectors
|
33
48
|
end
|
34
49
|
|
35
|
-
def raw(
|
50
|
+
def raw(document)
|
36
51
|
return nodes(session).map { |node|
|
37
52
|
node.evaluate_script('this.outerHTML')
|
38
53
|
}.join("\n")
|
@@ -51,14 +66,21 @@ end
|
|
51
66
|
|
52
67
|
def parse_options(argv)
|
53
68
|
argv = argv.dup
|
54
|
-
|
69
|
+
fetcher = :raw
|
55
70
|
css_selectors = []
|
71
|
+
text = false
|
56
72
|
|
57
73
|
parser = OptionParser.new
|
58
74
|
parser.version = Webg::VERSION
|
59
75
|
parser.banner = "Usage: #{File.basename(Process.argv0)} [options] uri"
|
60
76
|
parser.separator("")
|
61
77
|
parser.separator("Options:")
|
78
|
+
parser.on(
|
79
|
+
"--firefox",
|
80
|
+
"use Firefox to fetch web page. slow, but JavaScript-evaluatable",
|
81
|
+
) do
|
82
|
+
fetcher = :firefox
|
83
|
+
end
|
62
84
|
parser.on(
|
63
85
|
"--css-selector=SELECTOR",
|
64
86
|
"specify css selector to filter output.",
|
@@ -80,16 +102,18 @@ def parse_options(argv)
|
|
80
102
|
end
|
81
103
|
uri = URI(uri)
|
82
104
|
|
83
|
-
return uri,
|
105
|
+
return uri, fetcher, css_selectors, text
|
84
106
|
end
|
85
107
|
|
86
108
|
begin
|
87
|
-
uri,
|
88
|
-
|
89
|
-
|
109
|
+
uri, fetcher_name, css_selectors, text = parse_options(ARGV)
|
110
|
+
|
111
|
+
fetcher = Fetcher.const_get(fetcher_name.capitalize).new
|
90
112
|
selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
|
91
113
|
output_method_name = text ? :text : :raw
|
92
|
-
|
114
|
+
|
115
|
+
document = Nokogiri::HTML.parse(fetcher.(uri))
|
116
|
+
puts(selector.public_send(output_method_name, document))
|
93
117
|
rescue => e
|
94
118
|
$stderr.puts(e.message)
|
95
119
|
exit(1)
|
data/lib/webg/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuya.Nishida.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|