webg 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/exe/webg +43 -19
  3. data/lib/webg/version.rb +1 -1
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 630e12b7cb5e1e1445f5514a76acf815e99d3983d910847cda52bbb8ab57a76a
4
- data.tar.gz: a5e929bd8d112f5ffd137164fd61584016dedd003b6b9bcb6fc2181626c02220
3
+ metadata.gz: 7bdc7060ad44a00a15487e6905f336ecfe2f144940e06253d0073c183236bffd
4
+ data.tar.gz: 897236c858cc3fa6fee45d83677213318a6d11610646fac6214e1cf07e2af553
5
5
  SHA512:
6
- metadata.gz: 6aee0f7d77a8cb422d985bf82dd6994d530012b612c086713f2bc0805e9ad0763380fc24ee20b772df5b02a7790c382af28793e73656aee1c120e91ac83079e6
7
- data.tar.gz: 99f7fe5ebed8a832ffbc23d37b989a6c464abc47e81ea8c6c62079d9c860de6c1fa82b684f7f70d08430b7953de140ed1bcb552473d72678147b529b182ad3f6
6
+ metadata.gz: babb391559cd6b4e67fd86ca7c172b4ed341c378f4a5a3b4d3b3ba569036c48fcafc65143b17990b472aafdca7d11fe680b18fa7825bd4a023c2392dbbcee8ca
7
+ data.tar.gz: a99d118c3041fac293b5efcfb76643521c81a63cb827d489686abe49cb5b2cd068d1cbe54ad938663d6b208beaf95d5e59740bbce468b23e642c59e9d9193422
data/exe/webg CHANGED
@@ -3,36 +3,51 @@
3
3
  require "optparse"
4
4
  require "uri"
5
5
 
6
- require "capybara"
6
+ require "nokogiri"
7
7
 
8
8
  require "webg/version"
9
9
 
10
- class Selector
11
- def raw(session)
12
- raise NotImplementedError, "sub class must override."
13
- end
10
+ module Fetcher
11
+ end
14
12
 
15
- def text(session)
16
- raise NotImplementedError, "sub class must override."
13
+ class Fetcher::Raw
14
+ def call(uri)
15
+ require("open-uri")
16
+ return uri.read
17
17
  end
18
18
  end
19
19
 
20
- class Selector::All < Selector
21
- def raw(session)
20
+ class Fetcher::Firefox
21
+ def call(uri)
22
+ require("capybara")
23
+ session = Capybara::Session.new(:selenium_headless)
24
+ session.visit(uri)
22
25
  return session.body
23
26
  end
27
+ end
24
28
 
25
- def text(session)
26
- return session.text.chomp
29
+ module Selector
30
+ end
31
+
32
+ class Selector::All
33
+ REJECT_TAG_NAMES = %w[script noscript style]
34
+
35
+ def raw(document)
36
+ return document.to_s
37
+ end
38
+
39
+ def text(document)
40
+ document.css(REJECT_TAG_NAMES.join(", ")).remove
41
+ return document.inner_text
27
42
  end
28
43
  end
29
44
 
30
- class Selector::Css < Selector
45
+ class Selector::Css
31
46
  def initialize(css_selectors)
32
47
  @css_selectors = css_selectors
33
48
  end
34
49
 
35
- def raw(session)
50
+ def raw(document)
36
51
  return nodes(session).map { |node|
37
52
  node.evaluate_script('this.outerHTML')
38
53
  }.join("\n")
@@ -51,14 +66,21 @@ end
51
66
 
52
67
  def parse_options(argv)
53
68
  argv = argv.dup
54
- text = false
69
+ fetcher = :raw
55
70
  css_selectors = []
71
+ text = false
56
72
 
57
73
  parser = OptionParser.new
58
74
  parser.version = Webg::VERSION
59
75
  parser.banner = "Usage: #{File.basename(Process.argv0)} [options] uri"
60
76
  parser.separator("")
61
77
  parser.separator("Options:")
78
+ parser.on(
79
+ "--firefox",
80
+ "use Firefox to fetch web page. slow, but JavaScript-evaluatable",
81
+ ) do
82
+ fetcher = :firefox
83
+ end
62
84
  parser.on(
63
85
  "--css-selector=SELECTOR",
64
86
  "specify css selector to filter output.",
@@ -80,16 +102,18 @@ def parse_options(argv)
80
102
  end
81
103
  uri = URI(uri)
82
104
 
83
- return uri, text, css_selectors
105
+ return uri, fetcher, css_selectors, text
84
106
  end
85
107
 
86
108
  begin
87
- uri, text, css_selectors = parse_options(ARGV)
88
- session = Capybara::Session.new(:selenium_headless)
89
- session.visit(uri)
109
+ uri, fetcher_name, css_selectors, text = parse_options(ARGV)
110
+
111
+ fetcher = Fetcher.const_get(fetcher_name.capitalize).new
90
112
  selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
91
113
  output_method_name = text ? :text : :raw
92
- puts(selector.public_send(output_method_name, session))
114
+
115
+ document = Nokogiri::HTML.parse(fetcher.(uri))
116
+ puts(selector.public_send(output_method_name, document))
93
117
  rescue => e
94
118
  $stderr.puts(e.message)
95
119
  exit(1)
data/lib/webg/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Webg
4
- VERSION = "0.0.0"
4
+ VERSION = "0.1.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuya.Nishida.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-24 00:00:00.000000000 Z
11
+ date: 2021-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara