webg 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 630e12b7cb5e1e1445f5514a76acf815e99d3983d910847cda52bbb8ab57a76a
4
+ data.tar.gz: a5e929bd8d112f5ffd137164fd61584016dedd003b6b9bcb6fc2181626c02220
5
+ SHA512:
6
+ metadata.gz: 6aee0f7d77a8cb422d985bf82dd6994d530012b612c086713f2bc0805e9ad0763380fc24ee20b772df5b02a7790c382af28793e73656aee1c120e91ac83079e6
7
+ data.tar.gz: 99f7fe5ebed8a832ffbc23d37b989a6c464abc47e81ea8c6c62079d9c860de6c1fa82b684f7f70d08430b7953de140ed1bcb552473d72678147b529b182ad3f6
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in webg.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2020 Yuya.Nishida.
2
+
3
+ X11 License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # webg: A downloader to get web page with JavaScript
2
+
3
+ ## Requirements
4
+
5
+ - Ruby
6
+ - Firefox
7
+ - Firefox WebDriver support
8
+
9
+ ## Installation
10
+
11
+ ```ruby
12
+ $ gem install webg
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ```sh
18
+ $ webg [options] uri
19
+ ```
20
+
21
+ For example: fetch 'webg' page titles by google
22
+
23
+ ```sh
24
+ $ webg --text --css-selector='h3.LC20lb' 'https://www.google.com/search?q=webg'
25
+ ```
26
+
27
+ ## Contributing
28
+
29
+ Bug reports and pull requests are welcome on GitHub at https://github.com/nishidayuya/webg
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ task default: %i[]
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "webg"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/exe/webg ADDED
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "optparse"
4
+ require "uri"
5
+
6
+ require "capybara"
7
+
8
+ require "webg/version"
9
+
10
+ class Selector
11
+ def raw(session)
12
+ raise NotImplementedError, "sub class must override."
13
+ end
14
+
15
+ def text(session)
16
+ raise NotImplementedError, "sub class must override."
17
+ end
18
+ end
19
+
20
+ class Selector::All < Selector
21
+ def raw(session)
22
+ return session.body
23
+ end
24
+
25
+ def text(session)
26
+ return session.text.chomp
27
+ end
28
+ end
29
+
30
+ class Selector::Css < Selector
31
+ def initialize(css_selectors)
32
+ @css_selectors = css_selectors
33
+ end
34
+
35
+ def raw(session)
36
+ return nodes(session).map { |node|
37
+ node.evaluate_script('this.outerHTML')
38
+ }.join("\n")
39
+ end
40
+
41
+ def text(session)
42
+ return nodes(session).map(&:text).join("\n")
43
+ end
44
+
45
+ private
46
+
47
+ def nodes(session)
48
+ return session.all(@css_selectors.join(", "))
49
+ end
50
+ end
51
+
52
+ def parse_options(argv)
53
+ argv = argv.dup
54
+ text = false
55
+ css_selectors = []
56
+
57
+ parser = OptionParser.new
58
+ parser.version = Webg::VERSION
59
+ parser.banner = "Usage: #{File.basename(Process.argv0)} [options] uri"
60
+ parser.separator("")
61
+ parser.separator("Options:")
62
+ parser.on(
63
+ "--css-selector=SELECTOR",
64
+ "specify css selector to filter output.",
65
+ ) do |selector|
66
+ css_selectors << selector
67
+ end
68
+ parser.on(
69
+ "--text",
70
+ "output only text",
71
+ ) do
72
+ text = true
73
+ end
74
+ parser.parse!(argv)
75
+
76
+ uri = argv.shift
77
+ if !uri
78
+ $stderr.puts(parser.help)
79
+ exit(1)
80
+ end
81
+ uri = URI(uri)
82
+
83
+ return uri, text, css_selectors
84
+ end
85
+
86
+ begin
87
+ uri, text, css_selectors = parse_options(ARGV)
88
+ session = Capybara::Session.new(:selenium_headless)
89
+ session.visit(uri)
90
+ selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
91
+ output_method_name = text ? :text : :raw
92
+ puts(selector.public_send(output_method_name, session))
93
+ rescue => e
94
+ $stderr.puts(e.message)
95
+ exit(1)
96
+ end
data/lib/webg.rb ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "webg/version"
4
+
5
+ module Webg
6
+ class Error < StandardError; end
7
+ # Your code goes here...
8
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Webg
4
+ VERSION = "0.0.0"
5
+ end
data/webg.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/webg/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "webg"
7
+ spec.version = Webg::VERSION
8
+ spec.authors = ["Yuya.Nishida."]
9
+ spec.email = ["yuya@j96.org"]
10
+
11
+ spec.summary = "webg: A downloader to get web page with JavaScript"
12
+ spec.homepage = "https://github.com/nishidayuya/webg"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
14
+
15
+ spec.metadata["homepage_uri"] = spec.homepage
16
+ spec.metadata["source_code_uri"] = spec.homepage
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
22
+ end
23
+ spec.bindir = "exe"
24
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ["lib"]
26
+
27
+ spec.add_dependency "capybara"
28
+ spec.add_dependency "selenium-webdriver"
29
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: webg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Yuya.Nishida.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-04-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: capybara
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: selenium-webdriver
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description:
42
+ email:
43
+ - yuya@j96.org
44
+ executables:
45
+ - webg
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".gitignore"
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - bin/console
55
+ - bin/setup
56
+ - exe/webg
57
+ - lib/webg.rb
58
+ - lib/webg/version.rb
59
+ - webg.gemspec
60
+ homepage: https://github.com/nishidayuya/webg
61
+ licenses: []
62
+ metadata:
63
+ homepage_uri: https://github.com/nishidayuya/webg
64
+ source_code_uri: https://github.com/nishidayuya/webg
65
+ post_install_message:
66
+ rdoc_options: []
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: 2.4.0
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ requirements: []
80
+ rubygems_version: 3.2.15
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: 'webg: A downloader to get web page with JavaScript'
84
+ test_files: []