meat_sauce 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in meat_sauce.gemspec
4
+ gemspec
5
+
6
+
7
+ gem "pry-debugger", "~> 0.2.1"
8
+ gem 'pry-coolline'
9
+ gem 'pry-remote'
10
+
11
+ gem 'hirb'
12
+ gem 'hirb-unicode'
13
+ gem 'awesome_print'
14
+
15
+ gem 'rspec'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 y.fujii
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,93 @@
1
+ # MeatSauce
2
+
3
+ ページをスクレイピングしたり、スクリーンショットを撮ったり
4
+ scraping web page, screen shot web page ....
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'meat_sauce'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install meat_sauce
19
+
20
+ ## Usage
21
+
22
+ ### 使えるコマンド表示(show subcommand)
23
+ ```ruby
24
+ $ meat_sauce
25
+ Tasks:
26
+ meat_sauce capture -f, --file-path=FILE_PATH # 指定した深さまで画面のスクリーンショットを取ります(Screenshot the link of the specified depth page)
27
+ meat_sauce help [TASK] # Describe available tasks or one specific task
28
+ meat_sauce scraping -t, --tag=TAG # 指定したタグの中身を抜き出します(specific tag inside extracted)
29
+ meat_sauce show_links # 指定した深さまでリンクの一覧を表示します(Display the link of the specified depth)
30
+ meat_sauce show_source # ソースを表示します(retrive http source)
31
+
32
+ Options:
33
+ -u, [--url=URL]
34
+ -b, [--browser=BROWSER]
35
+ # Default: firefox
36
+ ```
37
+
38
+
39
+ ###コマンド詳細(command description)
40
+
41
+ **各コマンド共通(every method common option)**
42
+
43
+ option | abbreviation | explain
44
+ :---------- |:---------- |:----------
45
+ --url |-u | target url
46
+ --browser | -b | select browser(default firefox)
47
+
48
+ <br />
49
+
50
+ **サブコマンド(sub command)**
51
+
52
+ + **`capture`**
53
+ screen shot web page
54
+ WEBページのスクリーンショットを撮ります
55
+
56
+ option | abbreviation | explain
57
+ :---------- |:---------- |:----------
58
+ --file-path |-f | 画像の保存先を指定します(save file path)
59
+ --depth-limit | -d | URLを起点に取得するリンクの深さを指定<br />(The depth of the link acquired with URL as the starting point is specified. default 0)
60
+
61
+ + **`scraping`**
62
+ retrieve text inner tag
63
+ WEBページのタグの中身を取得します
64
+
65
+ option | abbreviation | explain
66
+ :---------- |:---------- |:----------
67
+ --tag | -t | target tag
68
+ only-text | -o | text only default true)
69
+
70
+ <br />
71
+
72
+ + **`show_links `**
73
+ get all link in page
74
+ ページの全てのリンクを取得します
75
+
76
+ option | abbreviation | explain
77
+ :---------- |:---------- |:----------
78
+ --depth-limit | -d | URLを起点に取得するリンクの深さを指定<br />(The depth of the link acquired with URL as the starting point is specified. default 0)
79
+
80
+ <br />
81
+
82
+ + **`show_source `**
83
+ get http source
84
+ ページのソースを取得します
85
+
86
+
87
+ ## Contributing
88
+
89
+ 1. Fork it
90
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
91
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
92
+ 4. Push to the branch (`git push origin my-new-feature`)
93
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/meat_sauce ADDED
@@ -0,0 +1,9 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require "pathname"
4
+ lib = Pathname.new(__FILE__).dirname.join("..", "lib").expand_path
5
+ $LOAD_PATH.unshift lib.to_s
6
+
7
+ require 'meat_sauce'
8
+
9
+ MeatSauce::CLI.start
data/lib/meat_sauce.rb ADDED
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ require "meat_sauce/version"
3
+ require "meat_sauce/cli"
4
+
5
+ module MeatSauce
6
+ # 一旦なしよ
7
+ def self.load_thorfiles(dir)
8
+ Dir.chdir(dir) do
9
+ thor_files = Dir.glob('**/*.thor').delete_if { |x| not File.file?(x) }
10
+ thor_files.each do |f|
11
+ Thor::Util.load_thorfile(f)
12
+ end
13
+ end
14
+ end
15
+
16
+ # MeatSauce.load_thorfiles('lib/meat_sauce')
17
+ end
@@ -0,0 +1,126 @@
1
+ # encoding: utf-8
2
+ require "thor"
3
+ require "nokogiri"
4
+ require "selenium-webdriver"
5
+ require "anemone"
6
+
7
+ #require 'pry'
8
+
9
+ module MeatSauce
10
+ class UrlFormatError < StandardError;end
11
+ class CLI < Thor
12
+ # class_option :url, :required => true, :aliases => "-u" なんかhelp見れなくなるからrequireをコメントアウト
13
+ class_option :url, aliases: "-u"
14
+ class_option :browser , type: :string, default: "firefox", aliases: "-b"
15
+
16
+ desc "scraping", "指定したタグの中身を抜き出します(specific tag inside extracted)" # {{{
17
+ method_option :tag, :required => true, :aliases => "-t"
18
+ method_option :only_text, type: :boolean, default: true, aliases: "-o"
19
+ def scraping
20
+ open_driver do |driver|
21
+ html = driver.page_source
22
+ doc = Nokogiri::HTML(html)
23
+
24
+ doc.css(options[:tag]).each do |tag|
25
+
26
+ if options[:only_text]
27
+ puts tag.inner_text
28
+ else
29
+ puts tag.inner_html
30
+ end
31
+ end
32
+ end
33
+ end # }}}
34
+
35
+ desc "show_source", "ソースを表示します(retrive http source)" # {{{
36
+ def show_source
37
+ open_driver do |driver|
38
+ puts driver.page_source
39
+ end
40
+ end # }}}
41
+
42
+ desc "show_links", "指定した深さまでリンクの一覧を表示します(Display the link of the specified depth)" # {{{
43
+ method_option :depth_limit, type: :numeric, aliases: "-d"
44
+ method_option :skip_query_string, type: :boolean, default: true, aliases: "-s"
45
+ def show_links(call_as_function = false)
46
+ opts = {
47
+ skip_query_strings: options[:skip_query_string]
48
+ }
49
+ opts.merge!(depth_limit: options[:depth_limit]) unless options[:depth_limit].nil?
50
+
51
+ url = []
52
+ Anemone.crawl(url_complement(options[:url]), opts) do |anemone|
53
+ anemone.on_every_page do |page|
54
+ url << page.url
55
+ end
56
+ end
57
+
58
+ if call_as_function
59
+ return url
60
+ else
61
+ puts url.join("\n")
62
+ end
63
+ end # }}}
64
+
65
+ desc "capture", "指定した深さまで画面のスクリーンショットを取ります(Screenshot the link of the specified depth page)" # {{{
66
+ method_option :file_path, required: true, aliases: "-f"
67
+ method_option :depth_limit, type: :numeric, default: 0, aliases: "-d"
68
+ def capture
69
+ links = show_links(true)
70
+ open_driver do |driver|
71
+ if options[:depth_limit] == 0
72
+ # スクリーンショットを取る
73
+ driver.save_screenshot(options[:file_path])
74
+ else
75
+
76
+ links.each_with_index do |link, idx|
77
+ driver.navigate.to(link.to_s)
78
+ driver.save_screenshot(path_add_suffix(options[:file_path], idx))
79
+ end
80
+ end
81
+ end
82
+ end # }}}
83
+
84
+ private # {{{
85
+ # 有効なURL書式かどうか
86
+ def url_complement(url_path)
87
+ if url_path.nil?
88
+ say("'--url' または '-u'オプションでurlを指定してください (No value provided for required options '--url')", :red)
89
+ end
90
+
91
+ url = url_path
92
+ unless url_path.start_with?("http://")
93
+ url = "http://" + url
94
+ end
95
+
96
+ raise UrlFormatError, "invalid url" if URI.regexp.match(url).nil?
97
+ return url
98
+ end
99
+
100
+ def open_driver
101
+ # ブラウザ起動
102
+ # :chrome, :firefox, :safari, :ie, :operaなどに変更可能
103
+ driver = Selenium::WebDriver.for(options[:browser].to_sym)
104
+
105
+ # urlにアクセス
106
+ driver.navigate.to(url_complement(options[:url]))
107
+
108
+ yield(driver) if block_given?
109
+
110
+ rescue => ex
111
+ say ex.message, :red
112
+ ensure
113
+ # ブラウザ終了
114
+ driver.quit if defined?(driver)
115
+ end
116
+
117
+ def path_add_suffix(path, idx)
118
+ dir_path ||= File.dirname(path)
119
+ ext_name ||= File.extname(path)
120
+
121
+ file_name = File.basename(path, ext_name)
122
+ [dir_path, "/" ,file_name,"_#{idx.to_s}",ext_name].join
123
+ end
124
+ # end private }}}
125
+ end
126
+ end
@@ -0,0 +1,20 @@
1
+ class ShowSource < MeatSauce::CLI
2
+ # desc "引数説明", "コマンド説明文"
3
+ desc "show_source", "retrive http source"
4
+ # メソッド名=サブコマンド名
5
+ method_option :url, :required => true, :aliases => "-u"
6
+ method_option :tag, :type => :string, :default => "", :aliases => "-t"
7
+ def show_source
8
+ # ブラウザ起動
9
+ # :chrome, :firefox, :safari, :ie, :operaなどに変更可能
10
+ driver = Selenium::WebDriver.for(options[:browser].to_sym)
11
+
12
+ # Googleにアクセス
13
+ driver.navigate.to(url_complement(options[:url]))
14
+
15
+ puts driver.page_source
16
+ ensure
17
+ # ブラウザ終了
18
+ driver.quit if defined?(driver)
19
+ end
20
+ end
@@ -0,0 +1,3 @@
1
+ module MeatSauce
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'meat_sauce/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "meat_sauce"
8
+ spec.version = MeatSauce::VERSION
9
+ spec.authors = ["y.fujii"]
10
+ spec.email = ["ishikurasakura@gmail.com"]
11
+ spec.description = %q{ページをスクレイピングしたり、スクリーンショットを撮ったり(scraping web page, screen shot web page .... and more)}
12
+ spec.summary = %q{web util tool}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ #spec.add_development_dependency "bundler", "~> 1.3"
22
+ #spec.add_development_dependency "rake"
23
+ spec.add_dependency('thor', '~> 0.17.0')
24
+ spec.add_dependency('selenium-webdriver', '~> 2.35.1')
25
+ spec.add_dependency('nokogiri', '~> 1.6.0')
26
+ spec.add_dependency('anemone', '~> 0.7.2')
27
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: meat_sauce
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - y.fujii
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: thor
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.17.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.17.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: selenium-webdriver
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 2.35.1
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.35.1
46
+ - !ruby/object:Gem::Dependency
47
+ name: nokogiri
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 1.6.0
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.6.0
62
+ - !ruby/object:Gem::Dependency
63
+ name: anemone
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 0.7.2
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.7.2
78
+ description: ページをスクレイピングしたり、スクリーンショットを撮ったり(scraping web page, screen shot web page
79
+ .... and more)
80
+ email:
81
+ - ishikurasakura@gmail.com
82
+ executables:
83
+ - meat_sauce
84
+ extensions: []
85
+ extra_rdoc_files: []
86
+ files:
87
+ - .gitignore
88
+ - Gemfile
89
+ - LICENSE.txt
90
+ - README.md
91
+ - Rakefile
92
+ - bin/meat_sauce
93
+ - lib/meat_sauce.rb
94
+ - lib/meat_sauce/cli.rb
95
+ - lib/meat_sauce/show_source.thor
96
+ - lib/meat_sauce/version.rb
97
+ - meat_sauce.gemspec
98
+ homepage: ''
99
+ licenses:
100
+ - MIT
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ none: false
113
+ requirements:
114
+ - - ! '>='
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ requirements: []
118
+ rubyforge_project:
119
+ rubygems_version: 1.8.23
120
+ signing_key:
121
+ specification_version: 3
122
+ summary: web util tool
123
+ test_files: []