webg 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/exe/webg +43 -19
  3. data/lib/webg/version.rb +1 -1
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 630e12b7cb5e1e1445f5514a76acf815e99d3983d910847cda52bbb8ab57a76a
4
- data.tar.gz: a5e929bd8d112f5ffd137164fd61584016dedd003b6b9bcb6fc2181626c02220
3
+ metadata.gz: 7bdc7060ad44a00a15487e6905f336ecfe2f144940e06253d0073c183236bffd
4
+ data.tar.gz: 897236c858cc3fa6fee45d83677213318a6d11610646fac6214e1cf07e2af553
5
5
  SHA512:
6
- metadata.gz: 6aee0f7d77a8cb422d985bf82dd6994d530012b612c086713f2bc0805e9ad0763380fc24ee20b772df5b02a7790c382af28793e73656aee1c120e91ac83079e6
7
- data.tar.gz: 99f7fe5ebed8a832ffbc23d37b989a6c464abc47e81ea8c6c62079d9c860de6c1fa82b684f7f70d08430b7953de140ed1bcb552473d72678147b529b182ad3f6
6
+ metadata.gz: babb391559cd6b4e67fd86ca7c172b4ed341c378f4a5a3b4d3b3ba569036c48fcafc65143b17990b472aafdca7d11fe680b18fa7825bd4a023c2392dbbcee8ca
7
+ data.tar.gz: a99d118c3041fac293b5efcfb76643521c81a63cb827d489686abe49cb5b2cd068d1cbe54ad938663d6b208beaf95d5e59740bbce468b23e642c59e9d9193422
data/exe/webg CHANGED
@@ -3,36 +3,51 @@
3
3
  require "optparse"
4
4
  require "uri"
5
5
 
6
- require "capybara"
6
+ require "nokogiri"
7
7
 
8
8
  require "webg/version"
9
9
 
10
- class Selector
11
- def raw(session)
12
- raise NotImplementedError, "sub class must override."
13
- end
10
+ module Fetcher
11
+ end
14
12
 
15
- def text(session)
16
- raise NotImplementedError, "sub class must override."
13
+ class Fetcher::Raw
14
+ def call(uri)
15
+ require("open-uri")
16
+ return uri.read
17
17
  end
18
18
  end
19
19
 
20
- class Selector::All < Selector
21
- def raw(session)
20
+ class Fetcher::Firefox
21
+ def call(uri)
22
+ require("capybara")
23
+ session = Capybara::Session.new(:selenium_headless)
24
+ session.visit(uri)
22
25
  return session.body
23
26
  end
27
+ end
24
28
 
25
- def text(session)
26
- return session.text.chomp
29
+ module Selector
30
+ end
31
+
32
+ class Selector::All
33
+ REJECT_TAG_NAMES = %w[script noscript style]
34
+
35
+ def raw(document)
36
+ return document.to_s
37
+ end
38
+
39
+ def text(document)
40
+ document.css(REJECT_TAG_NAMES.join(", ")).remove
41
+ return document.inner_text
27
42
  end
28
43
  end
29
44
 
30
- class Selector::Css < Selector
45
+ class Selector::Css
31
46
  def initialize(css_selectors)
32
47
  @css_selectors = css_selectors
33
48
  end
34
49
 
35
- def raw(session)
50
+ def raw(document)
36
51
  return nodes(session).map { |node|
37
52
  node.evaluate_script('this.outerHTML')
38
53
  }.join("\n")
@@ -51,14 +66,21 @@ end
51
66
 
52
67
  def parse_options(argv)
53
68
  argv = argv.dup
54
- text = false
69
+ fetcher = :raw
55
70
  css_selectors = []
71
+ text = false
56
72
 
57
73
  parser = OptionParser.new
58
74
  parser.version = Webg::VERSION
59
75
  parser.banner = "Usage: #{File.basename(Process.argv0)} [options] uri"
60
76
  parser.separator("")
61
77
  parser.separator("Options:")
78
+ parser.on(
79
+ "--firefox",
80
+ "use Firefox to fetch web page. slow, but JavaScript-evaluatable",
81
+ ) do
82
+ fetcher = :firefox
83
+ end
62
84
  parser.on(
63
85
  "--css-selector=SELECTOR",
64
86
  "specify css selector to filter output.",
@@ -80,16 +102,18 @@ def parse_options(argv)
80
102
  end
81
103
  uri = URI(uri)
82
104
 
83
- return uri, text, css_selectors
105
+ return uri, fetcher, css_selectors, text
84
106
  end
85
107
 
86
108
  begin
87
- uri, text, css_selectors = parse_options(ARGV)
88
- session = Capybara::Session.new(:selenium_headless)
89
- session.visit(uri)
109
+ uri, fetcher_name, css_selectors, text = parse_options(ARGV)
110
+
111
+ fetcher = Fetcher.const_get(fetcher_name.capitalize).new
90
112
  selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
91
113
  output_method_name = text ? :text : :raw
92
- puts(selector.public_send(output_method_name, session))
114
+
115
+ document = Nokogiri::HTML.parse(fetcher.(uri))
116
+ puts(selector.public_send(output_method_name, document))
93
117
  rescue => e
94
118
  $stderr.puts(e.message)
95
119
  exit(1)
data/lib/webg/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Webg
4
- VERSION = "0.0.0"
4
+ VERSION = "0.1.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuya.Nishida.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-24 00:00:00.000000000 Z
11
+ date: 2021-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara