roro_support 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +5 -0
- data/lib/roro_support.rb +4 -2
- data/lib/roro_support/array.rb +32 -0
- data/lib/roro_support/bash.rb +12 -4
- data/lib/roro_support/crawler.rb +106 -7
- data/lib/roro_support/git.rb +63 -60
- data/lib/roro_support/kernel.rb +9 -0
- data/lib/roro_support/log.rb +18 -0
- data/lib/roro_support/misc.rb +108 -0
- data/lib/roro_support/string_handler.rb +2 -0
- data/lib/roro_support/version.rb +1 -1
- data/lib/roro_support/watir.rb +5 -4
- data/lib/roro_support/watir/browser.rb +3 -3
- data/lib/roro_support/watir/element.rb +10 -10
- data/lib/roro_support/watir/element_collection.rb +18 -21
- data/spec/fixtures/baidu.html +440 -0
- data/spec/lib/roro_support/array_spec.rb +27 -0
- data/spec/lib/roro_support/crawler_spec.rb +112 -0
- data/spec/lib/roro_support/git_spec.rb +23 -23
- data/spec/lib/roro_support/log/roro.log +29 -0
- data/spec/lib/roro_support/log_spec.rb +24 -0
- data/spec/lib/roro_support/misc_spec.rb +57 -0
- data/spec/lib/roro_support/watir/browser_spec.rb +6 -5
- data/spec/lib/roro_support/watir/element_collection_spec.rb +9 -0
- data/spec/spec_helper.rb +6 -4
- metadata +35 -15
- data/lib/roro_support/error.rb +0 -11
- data/lib/roro_support/methods.rb +0 -21
- data/lib/roro_support/req.rb +0 -37
- data/spec/fixtures/spec.html +0 -14
- data/spec/fixtures/spec_dir2/req_test_list.rb +0 -1
- data/spec/lib/roro_support/req_spec.rb +0 -16
- data/spec/lib/roro_support/watir/crawler_spec.rb +0 -13
- data/spec/lib/roro_support/watir/methods_spec.rb +0 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a68633f8acc5830b3436e922b89fb54b47ee2ee0
|
4
|
+
data.tar.gz: dae684e89b814a0fbaec2c568d604ef2f8dbd419
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52d338705821ca0ed22e6a140077daa9c2ab55882c2203e18731c73eca444fd8ac328e131f511fdb577e2c93b40d7d34c0b756b033176b92dbf7a29d2310115b
|
7
|
+
data.tar.gz: 3cf701805afd32b4190f1ce64dc327c39e10a295934627a9892c8df54171a13f36ce154d6404cb5ca01af4decdb245512511640d221b21c6b3f6a4962999ac91
|
data/Rakefile
CHANGED
data/lib/roro_support.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
require 'active_support'
|
2
2
|
require 'thor'
|
3
|
+
require 'nokogiri'
|
3
4
|
$LOAD_PATH <<
|
4
5
|
File.expand_path('../roro_support', __FILE__)
|
5
|
-
require '
|
6
|
+
require 'misc'
|
6
7
|
require 'watir'
|
7
8
|
require 'crawler'
|
9
|
+
require 'kernel'
|
10
|
+
require 'bash'
|
8
11
|
# require rest
|
9
|
-
require 'req'
|
10
12
|
RoRoSupport::Req.all_files_in File.expand_path('../roro_support', __FILE__)
|
11
13
|
include RoRoSupport
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module RoRoSupport
|
2
|
+
module ArrayDSL
|
3
|
+
Array.class_eval do
|
4
|
+
str = ""
|
5
|
+
|
6
|
+
def to_args
|
7
|
+
str = ""
|
8
|
+
self.each do |arg|
|
9
|
+
if arg.is_a? String
|
10
|
+
arg = preprocess arg
|
11
|
+
str << "'#{arg}',"
|
12
|
+
else
|
13
|
+
str << "#{arg},"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
str.gsub /\,$/, ''
|
17
|
+
end
|
18
|
+
|
19
|
+
def map_with_regexp(regexp)
|
20
|
+
arr = self.dup
|
21
|
+
arr.each_with_index do |e, idx|
|
22
|
+
arr[idx] = yield(e) if e.to_s[regexp]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def preprocess(str)
|
28
|
+
str.gsub /\'|\"/, ''
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/roro_support/bash.rb
CHANGED
@@ -1,10 +1,18 @@
|
|
1
1
|
module RoRoSupport
|
2
2
|
module Bash
|
3
|
-
|
3
|
+
def bash(command)
|
4
|
+
result = `#{command}`
|
5
|
+
result = result.gsub(/.+\_.+\.rb/, '')
|
6
|
+
if result
|
7
|
+
rr_print result
|
8
|
+
result
|
9
|
+
end
|
10
|
+
end
|
4
11
|
|
5
|
-
def
|
6
|
-
|
7
|
-
|
12
|
+
def bashes(commands=[])
|
13
|
+
if !commands.empty?
|
14
|
+
bash(commands.join " && ")
|
15
|
+
end
|
8
16
|
end
|
9
17
|
|
10
18
|
module ClassMethods
|
data/lib/roro_support/crawler.rb
CHANGED
@@ -1,14 +1,113 @@
|
|
1
|
+
include RoRoSupport::WatirDSL
|
1
2
|
module RoRoSupport
|
2
3
|
module Crawler
|
3
|
-
|
4
|
-
|
5
|
-
|
4
|
+
|
5
|
+
|
6
|
+
def crawler
|
7
|
+
if local?
|
8
|
+
if proxy?(8087)
|
9
|
+
::Watir::Browser.new :chrome, :switches => %w[--proxy-server=http://127.0.0.1:8087]
|
10
|
+
else
|
11
|
+
::Watir::Browser.new :chrome
|
12
|
+
end
|
13
|
+
else
|
14
|
+
Headless.new.start
|
15
|
+
::Watir::Browser.new :chrome
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_html_from(url)
|
20
|
+
html = ""
|
21
|
+
@b ||= crawler
|
22
|
+
|
23
|
+
get_html = lambda do
|
24
|
+
if url[/http/]
|
25
|
+
Nokogiri::HTML.parse @b.goto(url).html
|
26
|
+
else
|
27
|
+
Nokogiri::HTML.parse File.read(url)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
html = get_html.call
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_tags_from(url, selector)
|
35
|
+
html = get_html_from(url)
|
36
|
+
result = html.css(selector)
|
37
|
+
raise "#{selector} can't find result" if result.nil?
|
38
|
+
if result.respond_to?(:inner_html)
|
39
|
+
tag = result
|
40
|
+
elsif result.is_a?(Nokogiri::XML::NodeSet)
|
41
|
+
tags = []
|
42
|
+
result.each do |tag|
|
43
|
+
tags << tag
|
44
|
+
end
|
45
|
+
tags
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_attrs_in(tags, *attr_names)
|
50
|
+
result = []
|
51
|
+
tags.each do |tag|
|
52
|
+
attr_value = []
|
53
|
+
judge = lambda do |attr_names|
|
54
|
+
attr_names.each do |attr_name|
|
55
|
+
if attr_name == 'text'
|
56
|
+
attr_value << tag.text
|
57
|
+
elsif attr_name == 'inner_html'
|
58
|
+
attr_value << tag.inner_html
|
59
|
+
else
|
60
|
+
attr_value << tag.attribute(attr_name).value
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
if attr_names.length > 1
|
66
|
+
judge.call(attr_names)
|
67
|
+
elsif attr_names.length == 1
|
68
|
+
attr_name = attr_names
|
69
|
+
judge.call(attr_name)
|
70
|
+
attr_value = attr_value.join
|
71
|
+
end
|
72
|
+
result << attr_value
|
73
|
+
end
|
74
|
+
|
75
|
+
eval %Q(return #{result.to_args})
|
76
|
+
end
|
77
|
+
|
78
|
+
def get_tags_attrs_from(url, tags_selector, *attr_names)
|
79
|
+
tags = get_tags_from(url, tags_selector)
|
80
|
+
get_attrs_in(tags, *attr_names)
|
81
|
+
end
|
82
|
+
|
83
|
+
def browser_close
|
84
|
+
@b.close if @b.respond_to?(:close)
|
85
|
+
end
|
86
|
+
|
87
|
+
def handle(results)
|
88
|
+
if results.is_a?(Array)
|
89
|
+
results.each do |result|
|
90
|
+
yield result
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def get_home_url(url)
|
96
|
+
url[/(http(s)?\:\/\/)(www\.)?.+\.((com)|(org)|(info)|(me)|(net)|(cn))/]
|
97
|
+
end
|
98
|
+
|
99
|
+
# Handle accident error eg. timeout cause
|
100
|
+
def handle_accident_error
|
101
|
+
yield @b if @b && block_given?
|
102
|
+
end
|
103
|
+
|
104
|
+
private
|
105
|
+
def proxy?(port)
|
106
|
+
!`lsof -i:#{port}`.empty?
|
6
107
|
end
|
7
108
|
|
8
|
-
def
|
9
|
-
|
10
|
-
b.goto path
|
11
|
-
Format.utf8 b.html
|
109
|
+
def local?
|
110
|
+
__FILE__.match Regexp.new "/home/zxr/ruby_project"
|
12
111
|
end
|
13
112
|
end
|
14
113
|
end
|
data/lib/roro_support/git.rb
CHANGED
@@ -1,61 +1,64 @@
|
|
1
|
-
|
1
|
+
# deprecated
|
2
|
+
#require 'grit'
|
3
|
+
#
|
4
|
+
#module RoRoSupport
|
5
|
+
# module Git
|
6
|
+
|
2
7
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
end
|
61
|
-
end
|
8
|
+
#def git(bash, repo_path=Dir.pwd)
|
9
|
+
# repo = Grit::Repo.new repo_path
|
10
|
+
# ::Grit::Git.git_timeout = 30
|
11
|
+
# args = handle(bash)
|
12
|
+
# command = args.shift
|
13
|
+
# result = eval(bash_format(command, args))
|
14
|
+
# print result.join if result.respond_to?(:join)
|
15
|
+
#end
|
16
|
+
#
|
17
|
+
#def git_commit_all(msg)
|
18
|
+
# repo = Grit::Repo.new Dir.pwd
|
19
|
+
# repo.commit_all(msg)
|
20
|
+
#end
|
21
|
+
#
|
22
|
+
#private
|
23
|
+
#def bash_format(command, args)
|
24
|
+
# git_str = "repo.git.native '#{command}', {process_info: true}"
|
25
|
+
# if args.respond_to?(:each)
|
26
|
+
# args.each do |arg|
|
27
|
+
# git_str += ", '#{arg}'"
|
28
|
+
# end
|
29
|
+
# else
|
30
|
+
# arg = args
|
31
|
+
# git_str += ", '#{arg}'"
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# git_str
|
35
|
+
#end
|
36
|
+
#
|
37
|
+
#def handle(bash)
|
38
|
+
# start_idx, end_idx = nil
|
39
|
+
# args = bash.split(' ')
|
40
|
+
# args.each_with_index do |e, idx|
|
41
|
+
# if e[/'|"/]
|
42
|
+
# if start_idx.nil?
|
43
|
+
# start_idx = idx
|
44
|
+
# else
|
45
|
+
# end_idx = idx
|
46
|
+
# break
|
47
|
+
# end
|
48
|
+
# end
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# if start_idx && end_idx && start_idx != end_idx
|
52
|
+
# msg = args[start_idx..end_idx]
|
53
|
+
# front = args - msg
|
54
|
+
# msg = msg.join(' ')
|
55
|
+
# args = front + [msg]
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# args
|
59
|
+
#end
|
60
|
+
#
|
61
|
+
#def add_arg(git_str, arg)
|
62
|
+
#end
|
63
|
+
# end
|
64
|
+
#end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'logger'
|
2
|
+
module RoRoSupport
|
3
|
+
module Log
|
4
|
+
def log_put(log_file, log_caller=nil, *contents)
|
5
|
+
log_file ||= "roro.log"
|
6
|
+
contents.each do |content|
|
7
|
+
if defined?(Rails) && defined?(Rails.root)
|
8
|
+
log = Logger.new("#{Rails.root}/log/#{log_file}")
|
9
|
+
log.fatal(content)
|
10
|
+
end
|
11
|
+
log = Logger.new(STDOUT)
|
12
|
+
log.fatal(content)
|
13
|
+
end
|
14
|
+
|
15
|
+
print log_caller.join("\n") if log_caller && log_caller.is_a?(Array)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
module RoRoSupport
|
2
|
+
module Misc
|
3
|
+
module DollarVars
|
4
|
+
module RegVars
|
5
|
+
$signs4regexp = [
|
6
|
+
' ,',
|
7
|
+
'\\.',
|
8
|
+
'、'
|
9
|
+
]
|
10
|
+
|
11
|
+
def self.or
|
12
|
+
"(#{$signs4regexp.join('|')})"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class StringHandler
|
18
|
+
def collect(str, sign, filter = nil, &blk)
|
19
|
+
new_str_arr = collect_sign(str, sign, filter, &blk)
|
20
|
+
|
21
|
+
new_str = new_str_arr.join
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def collect_sign(str, sign, filter = nil, &blk)
|
26
|
+
raise 'sign must be a regexp' if !sign.is_a?(Regexp)
|
27
|
+
sign = Regexp.new "(?=#{sign.source})"
|
28
|
+
filter ||= sign
|
29
|
+
new_str_arr = []
|
30
|
+
|
31
|
+
str.split(sign).each do |line|
|
32
|
+
if line[filter]
|
33
|
+
if block_given?
|
34
|
+
line = yield(line)
|
35
|
+
end
|
36
|
+
|
37
|
+
new_str_arr << line if line
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
new_str_arr
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def set_instance_variable_from(klass, hash = {})
|
47
|
+
hash.each do |var_name, value|
|
48
|
+
var_sym = "@#{var_name}".to_sym
|
49
|
+
klass.instance_variable_set(var_sym, value)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class Format
|
54
|
+
class << self
|
55
|
+
def url(url, options = {})
|
56
|
+
return url if url[/http/]
|
57
|
+
if options[:local]
|
58
|
+
return "file://#{url}"
|
59
|
+
else
|
60
|
+
"http://#{url}" unless url[/http:\/\//]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def utf8(html)
|
65
|
+
html.gsub! /charset='.+'/, "charset='utf-8'"
|
66
|
+
html
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
class Req
|
73
|
+
attr_accessor :files
|
74
|
+
# list all files in dir
|
75
|
+
|
76
|
+
def initialize
|
77
|
+
@files = {}
|
78
|
+
end
|
79
|
+
|
80
|
+
def that(dir)
|
81
|
+
dir_load dir
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.all_files_in(dir_path)
|
86
|
+
dirname = dir_path.split('/').last.gsub(/\.rb/, '')
|
87
|
+
Dir[File.expand_path("../#{dirname}/**", dir_path)].each do |file|
|
88
|
+
require file
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def dir_load (dir_name)
|
95
|
+
Dir["#{dir_name}/**"].each do |file|
|
96
|
+
if Dir["#{file}/**"].length >= 1
|
97
|
+
dirname = file
|
98
|
+
$LOAD_PATH << dirname
|
99
|
+
dir_load dirname
|
100
|
+
else
|
101
|
+
filename = File.basename(file).gsub /\..+/, ''
|
102
|
+
@files[filename.to_sym] = file
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|