webg 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +4 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/exe/webg +96 -0
- data/lib/webg.rb +8 -0
- data/lib/webg/version.rb +5 -0
- data/webg.gemspec +29 -0
- metadata +84 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 630e12b7cb5e1e1445f5514a76acf815e99d3983d910847cda52bbb8ab57a76a
|
4
|
+
data.tar.gz: a5e929bd8d112f5ffd137164fd61584016dedd003b6b9bcb6fc2181626c02220
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6aee0f7d77a8cb422d985bf82dd6994d530012b612c086713f2bc0805e9ad0763380fc24ee20b772df5b02a7790c382af28793e73656aee1c120e91ac83079e6
|
7
|
+
data.tar.gz: 99f7fe5ebed8a832ffbc23d37b989a6c464abc47e81ea8c6c62079d9c860de6c1fa82b684f7f70d08430b7953de140ed1bcb552473d72678147b529b182ad3f6
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2020 Yuya.Nishida.
|
2
|
+
|
3
|
+
X11 License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# webg: A downloader to get web page with JavaScript
|
2
|
+
|
3
|
+
## Requirements
|
4
|
+
|
5
|
+
- Ruby
|
6
|
+
- Firefox
|
7
|
+
- Firefox WebDriver support
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
$ gem install webg
|
13
|
+
```
|
14
|
+
|
15
|
+
## Usage
|
16
|
+
|
17
|
+
```sh
|
18
|
+
$ webg [options] uri
|
19
|
+
```
|
20
|
+
|
21
|
+
For example: fetch 'webg' page titles by google
|
22
|
+
|
23
|
+
```sh
|
24
|
+
$ webg --text --css-selector='h3.LC20lb' 'https://www.google.com/search?q=webg'
|
25
|
+
```
|
26
|
+
|
27
|
+
## Contributing
|
28
|
+
|
29
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/nishidayuya/webg
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "webg"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/webg
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "optparse"
|
4
|
+
require "uri"
|
5
|
+
|
6
|
+
require "capybara"
|
7
|
+
|
8
|
+
require "webg/version"
|
9
|
+
|
10
|
+
class Selector
|
11
|
+
def raw(session)
|
12
|
+
raise NotImplementedError, "sub class must override."
|
13
|
+
end
|
14
|
+
|
15
|
+
def text(session)
|
16
|
+
raise NotImplementedError, "sub class must override."
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Selector::All < Selector
|
21
|
+
def raw(session)
|
22
|
+
return session.body
|
23
|
+
end
|
24
|
+
|
25
|
+
def text(session)
|
26
|
+
return session.text.chomp
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Selector::Css < Selector
|
31
|
+
def initialize(css_selectors)
|
32
|
+
@css_selectors = css_selectors
|
33
|
+
end
|
34
|
+
|
35
|
+
def raw(session)
|
36
|
+
return nodes(session).map { |node|
|
37
|
+
node.evaluate_script('this.outerHTML')
|
38
|
+
}.join("\n")
|
39
|
+
end
|
40
|
+
|
41
|
+
def text(session)
|
42
|
+
return nodes(session).map(&:text).join("\n")
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def nodes(session)
|
48
|
+
return session.all(@css_selectors.join(", "))
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse_options(argv)
|
53
|
+
argv = argv.dup
|
54
|
+
text = false
|
55
|
+
css_selectors = []
|
56
|
+
|
57
|
+
parser = OptionParser.new
|
58
|
+
parser.version = Webg::VERSION
|
59
|
+
parser.banner = "Usage: #{File.basename(Process.argv0)} [options] uri"
|
60
|
+
parser.separator("")
|
61
|
+
parser.separator("Options:")
|
62
|
+
parser.on(
|
63
|
+
"--css-selector=SELECTOR",
|
64
|
+
"specify css selector to filter output.",
|
65
|
+
) do |selector|
|
66
|
+
css_selectors << selector
|
67
|
+
end
|
68
|
+
parser.on(
|
69
|
+
"--text",
|
70
|
+
"output only text",
|
71
|
+
) do
|
72
|
+
text = true
|
73
|
+
end
|
74
|
+
parser.parse!(argv)
|
75
|
+
|
76
|
+
uri = argv.shift
|
77
|
+
if !uri
|
78
|
+
$stderr.puts(parser.help)
|
79
|
+
exit(1)
|
80
|
+
end
|
81
|
+
uri = URI(uri)
|
82
|
+
|
83
|
+
return uri, text, css_selectors
|
84
|
+
end
|
85
|
+
|
86
|
+
begin
|
87
|
+
uri, text, css_selectors = parse_options(ARGV)
|
88
|
+
session = Capybara::Session.new(:selenium_headless)
|
89
|
+
session.visit(uri)
|
90
|
+
selector = css_selectors.empty? ? Selector::All.new : Selector::Css.new(css_selectors)
|
91
|
+
output_method_name = text ? :text : :raw
|
92
|
+
puts(selector.public_send(output_method_name, session))
|
93
|
+
rescue => e
|
94
|
+
$stderr.puts(e.message)
|
95
|
+
exit(1)
|
96
|
+
end
|
data/lib/webg.rb
ADDED
data/lib/webg/version.rb
ADDED
data/webg.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/webg/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "webg"
|
7
|
+
spec.version = Webg::VERSION
|
8
|
+
spec.authors = ["Yuya.Nishida."]
|
9
|
+
spec.email = ["yuya@j96.org"]
|
10
|
+
|
11
|
+
spec.summary = "webg: A downloader to get web page with JavaScript"
|
12
|
+
spec.homepage = "https://github.com/nishidayuya/webg"
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
17
|
+
|
18
|
+
# Specify which files should be added to the gem when it is released.
|
19
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
20
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
21
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
22
|
+
end
|
23
|
+
spec.bindir = "exe"
|
24
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
25
|
+
spec.require_paths = ["lib"]
|
26
|
+
|
27
|
+
spec.add_dependency "capybara"
|
28
|
+
spec.add_dependency "selenium-webdriver"
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: webg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yuya.Nishida.
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-04-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: capybara
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: selenium-webdriver
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description:
|
42
|
+
email:
|
43
|
+
- yuya@j96.org
|
44
|
+
executables:
|
45
|
+
- webg
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ".gitignore"
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- bin/console
|
55
|
+
- bin/setup
|
56
|
+
- exe/webg
|
57
|
+
- lib/webg.rb
|
58
|
+
- lib/webg/version.rb
|
59
|
+
- webg.gemspec
|
60
|
+
homepage: https://github.com/nishidayuya/webg
|
61
|
+
licenses: []
|
62
|
+
metadata:
|
63
|
+
homepage_uri: https://github.com/nishidayuya/webg
|
64
|
+
source_code_uri: https://github.com/nishidayuya/webg
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
require_paths:
|
68
|
+
- lib
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: 2.4.0
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
requirements: []
|
80
|
+
rubygems_version: 3.2.15
|
81
|
+
signing_key:
|
82
|
+
specification_version: 4
|
83
|
+
summary: 'webg: A downloader to get web page with JavaScript'
|
84
|
+
test_files: []
|