meat_sauce 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +22 -0
- data/README.md +93 -0
- data/Rakefile +1 -0
- data/bin/meat_sauce +9 -0
- data/lib/meat_sauce.rb +17 -0
- data/lib/meat_sauce/cli.rb +126 -0
- data/lib/meat_sauce/show_source.thor +20 -0
- data/lib/meat_sauce/version.rb +3 -0
- data/meat_sauce.gemspec +27 -0
- metadata +123 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in meat_sauce.gemspec
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
|
7
|
+
gem "pry-debugger", "~> 0.2.1"
|
8
|
+
gem 'pry-coolline'
|
9
|
+
gem 'pry-remote'
|
10
|
+
|
11
|
+
gem 'hirb'
|
12
|
+
gem 'hirb-unicode'
|
13
|
+
gem 'awesome_print'
|
14
|
+
|
15
|
+
gem 'rspec'
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 y.fujii
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# MeatSauce
|
2
|
+
|
3
|
+
ページをスクレイピングしたり、スクリーンショットを撮ったり
|
4
|
+
scraping web page, screen shot web page ....
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
gem 'meat_sauce'
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
|
14
|
+
$ bundle
|
15
|
+
|
16
|
+
Or install it yourself as:
|
17
|
+
|
18
|
+
$ gem install meat_sauce
|
19
|
+
|
20
|
+
## Usage
|
21
|
+
|
22
|
+
### 使えるコマンド表示(show subcommand)
|
23
|
+
```ruby
|
24
|
+
$ meat_sauce
|
25
|
+
Tasks:
|
26
|
+
meat_sauce capture -f, --file-path=FILE_PATH # 指定した深さまで画面のスクリーンショットを取ります(Screenshot the link of the specified depth page)
|
27
|
+
meat_sauce help [TASK] # Describe available tasks or one specific task
|
28
|
+
meat_sauce scraping -t, --tag=TAG # 指定したタグの中身を抜き出します(specific tag inside extracted)
|
29
|
+
meat_sauce show_links # 指定した深さまでリンクの一覧を表示します(Display the link of the specified depth)
|
30
|
+
meat_sauce show_source # ソースを表示します(retrive http source)
|
31
|
+
|
32
|
+
Options:
|
33
|
+
-u, [--url=URL]
|
34
|
+
-b, [--browser=BROWSER]
|
35
|
+
# Default: firefox
|
36
|
+
```
|
37
|
+
|
38
|
+
|
39
|
+
###コマンド詳細(command description)
|
40
|
+
|
41
|
+
**各コマンド共通(every method common option)**
|
42
|
+
|
43
|
+
option | abbreviation | explain
|
44
|
+
:---------- |:---------- |:----------
|
45
|
+
--url |-u | target url
|
46
|
+
--browser | -b | select browser(default firefox)
|
47
|
+
|
48
|
+
<br />
|
49
|
+
|
50
|
+
**サブコマンド(sub command)**
|
51
|
+
|
52
|
+
+ **`capture`**
|
53
|
+
screen shot web page
|
54
|
+
WEBページのスクリーンショットを撮ります
|
55
|
+
|
56
|
+
option | abbreviation | explain
|
57
|
+
:---------- |:---------- |:----------
|
58
|
+
--file-path |-f | 画像の保存先を指定します(save file path)
|
59
|
+
--depth-limit | -d | URLを起点に取得するリンクの深さを指定<br />(The depth of the link acquired with URL as the starting point is specified. default 0)
|
60
|
+
|
61
|
+
+ **`scraping`**
|
62
|
+
retrieve text inner tag
|
63
|
+
WEBページのタグの中身を取得します
|
64
|
+
|
65
|
+
option | abbreviation | explain
|
66
|
+
:---------- |:---------- |:----------
|
67
|
+
--tag | -t | target tag
|
68
|
+
only-text | -o | text only default true)
|
69
|
+
|
70
|
+
<br />
|
71
|
+
|
72
|
+
+ **`show_links `**
|
73
|
+
get all link in page
|
74
|
+
ページの全てのリンクを取得します
|
75
|
+
|
76
|
+
option | abbreviation | explain
|
77
|
+
:---------- |:---------- |:----------
|
78
|
+
--depth-limit | -d | URLを起点に取得するリンクの深さを指定<br />(The depth of the link acquired with URL as the starting point is specified. default 0)
|
79
|
+
|
80
|
+
<br />
|
81
|
+
|
82
|
+
+ **`show_source `**
|
83
|
+
get http source
|
84
|
+
ページのソースを取得します
|
85
|
+
|
86
|
+
|
87
|
+
## Contributing
|
88
|
+
|
89
|
+
1. Fork it
|
90
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
91
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
92
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
93
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/meat_sauce
ADDED
data/lib/meat_sauce.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "meat_sauce/version"
|
3
|
+
require "meat_sauce/cli"
|
4
|
+
|
5
|
+
module MeatSauce
|
6
|
+
# 一旦なしよ
|
7
|
+
def self.load_thorfiles(dir)
|
8
|
+
Dir.chdir(dir) do
|
9
|
+
thor_files = Dir.glob('**/*.thor').delete_if { |x| not File.file?(x) }
|
10
|
+
thor_files.each do |f|
|
11
|
+
Thor::Util.load_thorfile(f)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# MeatSauce.load_thorfiles('lib/meat_sauce')
|
17
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "thor"
|
3
|
+
require "nokogiri"
|
4
|
+
require "selenium-webdriver"
|
5
|
+
require "anemone"
|
6
|
+
|
7
|
+
#require 'pry'
|
8
|
+
|
9
|
+
module MeatSauce
|
10
|
+
class UrlFormatError < StandardError;end
|
11
|
+
class CLI < Thor
|
12
|
+
# class_option :url, :required => true, :aliases => "-u" なんかhelp見れなくなるからrequireをコメントアウト
|
13
|
+
class_option :url, aliases: "-u"
|
14
|
+
class_option :browser , type: :string, default: "firefox", aliases: "-b"
|
15
|
+
|
16
|
+
desc "scraping", "指定したタグの中身を抜き出します(specific tag inside extracted)" # {{{
|
17
|
+
method_option :tag, :required => true, :aliases => "-t"
|
18
|
+
method_option :only_text, type: :boolean, default: true, aliases: "-o"
|
19
|
+
def scraping
|
20
|
+
open_driver do |driver|
|
21
|
+
html = driver.page_source
|
22
|
+
doc = Nokogiri::HTML(html)
|
23
|
+
|
24
|
+
doc.css(options[:tag]).each do |tag|
|
25
|
+
|
26
|
+
if options[:only_text]
|
27
|
+
puts tag.inner_text
|
28
|
+
else
|
29
|
+
puts tag.inner_html
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end # }}}
|
34
|
+
|
35
|
+
desc "show_source", "ソースを表示します(retrive http source)" # {{{
|
36
|
+
def show_source
|
37
|
+
open_driver do |driver|
|
38
|
+
puts driver.page_source
|
39
|
+
end
|
40
|
+
end # }}}
|
41
|
+
|
42
|
+
desc "show_links", "指定した深さまでリンクの一覧を表示します(Display the link of the specified depth)" # {{{
|
43
|
+
method_option :depth_limit, type: :numeric, aliases: "-d"
|
44
|
+
method_option :skip_query_string, type: :boolean, default: true, aliases: "-s"
|
45
|
+
def show_links(call_as_function = false)
|
46
|
+
opts = {
|
47
|
+
skip_query_strings: options[:skip_query_string]
|
48
|
+
}
|
49
|
+
opts.merge!(depth_limit: options[:depth_limit]) unless options[:depth_limit].nil?
|
50
|
+
|
51
|
+
url = []
|
52
|
+
Anemone.crawl(url_complement(options[:url]), opts) do |anemone|
|
53
|
+
anemone.on_every_page do |page|
|
54
|
+
url << page.url
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
if call_as_function
|
59
|
+
return url
|
60
|
+
else
|
61
|
+
puts url.join("\n")
|
62
|
+
end
|
63
|
+
end # }}}
|
64
|
+
|
65
|
+
desc "capture", "指定した深さまで画面のスクリーンショットを取ります(Screenshot the link of the specified depth page)" # {{{
|
66
|
+
method_option :file_path, required: true, aliases: "-f"
|
67
|
+
method_option :depth_limit, type: :numeric, default: 0, aliases: "-d"
|
68
|
+
def capture
|
69
|
+
links = show_links(true)
|
70
|
+
open_driver do |driver|
|
71
|
+
if options[:depth_limit] == 0
|
72
|
+
# スクリーンショットを取る
|
73
|
+
driver.save_screenshot(options[:file_path])
|
74
|
+
else
|
75
|
+
|
76
|
+
links.each_with_index do |link, idx|
|
77
|
+
driver.navigate.to(link.to_s)
|
78
|
+
driver.save_screenshot(path_add_suffix(options[:file_path], idx))
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end # }}}
|
83
|
+
|
84
|
+
private # {{{
|
85
|
+
# 有効なURL書式かどうか
|
86
|
+
def url_complement(url_path)
|
87
|
+
if url_path.nil?
|
88
|
+
say("'--url' または '-u'オプションでurlを指定してください (No value provided for required options '--url')", :red)
|
89
|
+
end
|
90
|
+
|
91
|
+
url = url_path
|
92
|
+
unless url_path.start_with?("http://")
|
93
|
+
url = "http://" + url
|
94
|
+
end
|
95
|
+
|
96
|
+
raise UrlFormatError, "invalid url" if URI.regexp.match(url).nil?
|
97
|
+
return url
|
98
|
+
end
|
99
|
+
|
100
|
+
def open_driver
|
101
|
+
# ブラウザ起動
|
102
|
+
# :chrome, :firefox, :safari, :ie, :operaなどに変更可能
|
103
|
+
driver = Selenium::WebDriver.for(options[:browser].to_sym)
|
104
|
+
|
105
|
+
# urlにアクセス
|
106
|
+
driver.navigate.to(url_complement(options[:url]))
|
107
|
+
|
108
|
+
yield(driver) if block_given?
|
109
|
+
|
110
|
+
rescue => ex
|
111
|
+
say ex.message, :red
|
112
|
+
ensure
|
113
|
+
# ブラウザ終了
|
114
|
+
driver.quit if defined?(driver)
|
115
|
+
end
|
116
|
+
|
117
|
+
def path_add_suffix(path, idx)
|
118
|
+
dir_path ||= File.dirname(path)
|
119
|
+
ext_name ||= File.extname(path)
|
120
|
+
|
121
|
+
file_name = File.basename(path, ext_name)
|
122
|
+
[dir_path, "/" ,file_name,"_#{idx.to_s}",ext_name].join
|
123
|
+
end
|
124
|
+
# end private }}}
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class ShowSource < MeatSauce::CLI
|
2
|
+
# desc "引数説明", "コマンド説明文"
|
3
|
+
desc "show_source", "retrive http source"
|
4
|
+
# メソッド名=サブコマンド名
|
5
|
+
method_option :url, :required => true, :aliases => "-u"
|
6
|
+
method_option :tag, :type => :string, :default => "", :aliases => "-t"
|
7
|
+
def show_source
|
8
|
+
# ブラウザ起動
|
9
|
+
# :chrome, :firefox, :safari, :ie, :operaなどに変更可能
|
10
|
+
driver = Selenium::WebDriver.for(options[:browser].to_sym)
|
11
|
+
|
12
|
+
# Googleにアクセス
|
13
|
+
driver.navigate.to(url_complement(options[:url]))
|
14
|
+
|
15
|
+
puts driver.page_source
|
16
|
+
ensure
|
17
|
+
# ブラウザ終了
|
18
|
+
driver.quit if defined?(driver)
|
19
|
+
end
|
20
|
+
end
|
data/meat_sauce.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'meat_sauce/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "meat_sauce"
|
8
|
+
spec.version = MeatSauce::VERSION
|
9
|
+
spec.authors = ["y.fujii"]
|
10
|
+
spec.email = ["ishikurasakura@gmail.com"]
|
11
|
+
spec.description = %q{ページをスクレイピングしたり、スクリーンショットを撮ったり(scraping web page, screen shot web page .... and more)}
|
12
|
+
spec.summary = %q{web util tool}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
#spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
#spec.add_development_dependency "rake"
|
23
|
+
spec.add_dependency('thor', '~> 0.17.0')
|
24
|
+
spec.add_dependency('selenium-webdriver', '~> 2.35.1')
|
25
|
+
spec.add_dependency('nokogiri', '~> 1.6.0')
|
26
|
+
spec.add_dependency('anemone', '~> 0.7.2')
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: meat_sauce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- y.fujii
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-11 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: thor
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.17.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.17.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: selenium-webdriver
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 2.35.1
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.35.1
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: nokogiri
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.6.0
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.6.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: anemone
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.7.2
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.7.2
|
78
|
+
description: ページをスクレイピングしたり、スクリーンショットを撮ったり(scraping web page, screen shot web page
|
79
|
+
.... and more)
|
80
|
+
email:
|
81
|
+
- ishikurasakura@gmail.com
|
82
|
+
executables:
|
83
|
+
- meat_sauce
|
84
|
+
extensions: []
|
85
|
+
extra_rdoc_files: []
|
86
|
+
files:
|
87
|
+
- .gitignore
|
88
|
+
- Gemfile
|
89
|
+
- LICENSE.txt
|
90
|
+
- README.md
|
91
|
+
- Rakefile
|
92
|
+
- bin/meat_sauce
|
93
|
+
- lib/meat_sauce.rb
|
94
|
+
- lib/meat_sauce/cli.rb
|
95
|
+
- lib/meat_sauce/show_source.thor
|
96
|
+
- lib/meat_sauce/version.rb
|
97
|
+
- meat_sauce.gemspec
|
98
|
+
homepage: ''
|
99
|
+
licenses:
|
100
|
+
- MIT
|
101
|
+
post_install_message:
|
102
|
+
rdoc_options: []
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
none: false
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
|
+
none: false
|
113
|
+
requirements:
|
114
|
+
- - ! '>='
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
requirements: []
|
118
|
+
rubyforge_project:
|
119
|
+
rubygems_version: 1.8.23
|
120
|
+
signing_key:
|
121
|
+
specification_version: 3
|
122
|
+
summary: web util tool
|
123
|
+
test_files: []
|