roro_support 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +5 -0
- data/lib/roro_support.rb +4 -2
- data/lib/roro_support/array.rb +32 -0
- data/lib/roro_support/bash.rb +12 -4
- data/lib/roro_support/crawler.rb +106 -7
- data/lib/roro_support/git.rb +63 -60
- data/lib/roro_support/kernel.rb +9 -0
- data/lib/roro_support/log.rb +18 -0
- data/lib/roro_support/misc.rb +108 -0
- data/lib/roro_support/string_handler.rb +2 -0
- data/lib/roro_support/version.rb +1 -1
- data/lib/roro_support/watir.rb +5 -4
- data/lib/roro_support/watir/browser.rb +3 -3
- data/lib/roro_support/watir/element.rb +10 -10
- data/lib/roro_support/watir/element_collection.rb +18 -21
- data/spec/fixtures/baidu.html +440 -0
- data/spec/lib/roro_support/array_spec.rb +27 -0
- data/spec/lib/roro_support/crawler_spec.rb +112 -0
- data/spec/lib/roro_support/git_spec.rb +23 -23
- data/spec/lib/roro_support/log/roro.log +29 -0
- data/spec/lib/roro_support/log_spec.rb +24 -0
- data/spec/lib/roro_support/misc_spec.rb +57 -0
- data/spec/lib/roro_support/watir/browser_spec.rb +6 -5
- data/spec/lib/roro_support/watir/element_collection_spec.rb +9 -0
- data/spec/spec_helper.rb +6 -4
- metadata +35 -15
- data/lib/roro_support/error.rb +0 -11
- data/lib/roro_support/methods.rb +0 -21
- data/lib/roro_support/req.rb +0 -37
- data/spec/fixtures/spec.html +0 -14
- data/spec/fixtures/spec_dir2/req_test_list.rb +0 -1
- data/spec/lib/roro_support/req_spec.rb +0 -16
- data/spec/lib/roro_support/watir/crawler_spec.rb +0 -13
- data/spec/lib/roro_support/watir/methods_spec.rb +0 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a68633f8acc5830b3436e922b89fb54b47ee2ee0
|
4
|
+
data.tar.gz: dae684e89b814a0fbaec2c568d604ef2f8dbd419
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52d338705821ca0ed22e6a140077daa9c2ab55882c2203e18731c73eca444fd8ac328e131f511fdb577e2c93b40d7d34c0b756b033176b92dbf7a29d2310115b
|
7
|
+
data.tar.gz: 3cf701805afd32b4190f1ce64dc327c39e10a295934627a9892c8df54171a13f36ce154d6404cb5ca01af4decdb245512511640d221b21c6b3f6a4962999ac91
|
data/Rakefile
CHANGED
data/lib/roro_support.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
require 'active_support'
|
2
2
|
require 'thor'
|
3
|
+
require 'nokogiri'
|
3
4
|
$LOAD_PATH <<
|
4
5
|
File.expand_path('../roro_support', __FILE__)
|
5
|
-
require '
|
6
|
+
require 'misc'
|
6
7
|
require 'watir'
|
7
8
|
require 'crawler'
|
9
|
+
require 'kernel'
|
10
|
+
require 'bash'
|
8
11
|
# require rest
|
9
|
-
require 'req'
|
10
12
|
RoRoSupport::Req.all_files_in File.expand_path('../roro_support', __FILE__)
|
11
13
|
include RoRoSupport
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module RoRoSupport
|
2
|
+
module ArrayDSL
|
3
|
+
Array.class_eval do
|
4
|
+
str = ""
|
5
|
+
|
6
|
+
def to_args
|
7
|
+
str = ""
|
8
|
+
self.each do |arg|
|
9
|
+
if arg.is_a? String
|
10
|
+
arg = preprocess arg
|
11
|
+
str << "'#{arg}',"
|
12
|
+
else
|
13
|
+
str << "#{arg},"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
str.gsub /\,$/, ''
|
17
|
+
end
|
18
|
+
|
19
|
+
def map_with_regexp(regexp)
|
20
|
+
arr = self.dup
|
21
|
+
arr.each_with_index do |e, idx|
|
22
|
+
arr[idx] = yield(e) if e.to_s[regexp]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def preprocess(str)
|
28
|
+
str.gsub /\'|\"/, ''
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/roro_support/bash.rb
CHANGED
@@ -1,10 +1,18 @@
|
|
1
1
|
module RoRoSupport
|
2
2
|
module Bash
|
3
|
-
|
3
|
+
def bash(command)
|
4
|
+
result = `#{command}`
|
5
|
+
result = result.gsub(/.+\_.+\.rb/, '')
|
6
|
+
if result
|
7
|
+
rr_print result
|
8
|
+
result
|
9
|
+
end
|
10
|
+
end
|
4
11
|
|
5
|
-
def
|
6
|
-
|
7
|
-
|
12
|
+
def bashes(commands=[])
|
13
|
+
if !commands.empty?
|
14
|
+
bash(commands.join " && ")
|
15
|
+
end
|
8
16
|
end
|
9
17
|
|
10
18
|
module ClassMethods
|
data/lib/roro_support/crawler.rb
CHANGED
@@ -1,14 +1,113 @@
|
|
1
|
+
include RoRoSupport::WatirDSL
|
1
2
|
module RoRoSupport
|
2
3
|
module Crawler
|
3
|
-
|
4
|
-
|
5
|
-
|
4
|
+
|
5
|
+
|
6
|
+
def crawler
|
7
|
+
if local?
|
8
|
+
if proxy?(8087)
|
9
|
+
::Watir::Browser.new :chrome, :switches => %w[--proxy-server=http://127.0.0.1:8087]
|
10
|
+
else
|
11
|
+
::Watir::Browser.new :chrome
|
12
|
+
end
|
13
|
+
else
|
14
|
+
Headless.new.start
|
15
|
+
::Watir::Browser.new :chrome
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_html_from(url)
|
20
|
+
html = ""
|
21
|
+
@b ||= crawler
|
22
|
+
|
23
|
+
get_html = lambda do
|
24
|
+
if url[/http/]
|
25
|
+
Nokogiri::HTML.parse @b.goto(url).html
|
26
|
+
else
|
27
|
+
Nokogiri::HTML.parse File.read(url)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
html = get_html.call
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_tags_from(url, selector)
|
35
|
+
html = get_html_from(url)
|
36
|
+
result = html.css(selector)
|
37
|
+
raise "#{selector} can't find result" if result.nil?
|
38
|
+
if result.respond_to?(:inner_html)
|
39
|
+
tag = result
|
40
|
+
elsif result.is_a?(Nokogiri::XML::NodeSet)
|
41
|
+
tags = []
|
42
|
+
result.each do |tag|
|
43
|
+
tags << tag
|
44
|
+
end
|
45
|
+
tags
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_attrs_in(tags, *attr_names)
|
50
|
+
result = []
|
51
|
+
tags.each do |tag|
|
52
|
+
attr_value = []
|
53
|
+
judge = lambda do |attr_names|
|
54
|
+
attr_names.each do |attr_name|
|
55
|
+
if attr_name == 'text'
|
56
|
+
attr_value << tag.text
|
57
|
+
elsif attr_name == 'inner_html'
|
58
|
+
attr_value << tag.inner_html
|
59
|
+
else
|
60
|
+
attr_value << tag.attribute(attr_name).value
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
if attr_names.length > 1
|
66
|
+
judge.call(attr_names)
|
67
|
+
elsif attr_names.length == 1
|
68
|
+
attr_name = attr_names
|
69
|
+
judge.call(attr_name)
|
70
|
+
attr_value = attr_value.join
|
71
|
+
end
|
72
|
+
result << attr_value
|
73
|
+
end
|
74
|
+
|
75
|
+
eval %Q(return #{result.to_args})
|
76
|
+
end
|
77
|
+
|
78
|
+
def get_tags_attrs_from(url, tags_selector, *attr_names)
|
79
|
+
tags = get_tags_from(url, tags_selector)
|
80
|
+
get_attrs_in(tags, *attr_names)
|
81
|
+
end
|
82
|
+
|
83
|
+
def browser_close
|
84
|
+
@b.close if @b.respond_to?(:close)
|
85
|
+
end
|
86
|
+
|
87
|
+
def handle(results)
|
88
|
+
if results.is_a?(Array)
|
89
|
+
results.each do |result|
|
90
|
+
yield result
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def get_home_url(url)
|
96
|
+
url[/(http(s)?\:\/\/)(www\.)?.+\.((com)|(org)|(info)|(me)|(net)|(cn))/]
|
97
|
+
end
|
98
|
+
|
99
|
+
# Handle accident error eg. timeout cause
|
100
|
+
def handle_accident_error
|
101
|
+
yield @b if @b && block_given?
|
102
|
+
end
|
103
|
+
|
104
|
+
private
|
105
|
+
def proxy?(port)
|
106
|
+
!`lsof -i:#{port}`.empty?
|
6
107
|
end
|
7
108
|
|
8
|
-
def
|
9
|
-
|
10
|
-
b.goto path
|
11
|
-
Format.utf8 b.html
|
109
|
+
def local?
|
110
|
+
__FILE__.match Regexp.new "/home/zxr/ruby_project"
|
12
111
|
end
|
13
112
|
end
|
14
113
|
end
|
data/lib/roro_support/git.rb
CHANGED
@@ -1,61 +1,64 @@
|
|
1
|
-
|
1
|
+
# deprecated
|
2
|
+
#require 'grit'
|
3
|
+
#
|
4
|
+
#module RoRoSupport
|
5
|
+
# module Git
|
6
|
+
|
2
7
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
end
|
61
|
-
end
|
8
|
+
#def git(bash, repo_path=Dir.pwd)
|
9
|
+
# repo = Grit::Repo.new repo_path
|
10
|
+
# ::Grit::Git.git_timeout = 30
|
11
|
+
# args = handle(bash)
|
12
|
+
# command = args.shift
|
13
|
+
# result = eval(bash_format(command, args))
|
14
|
+
# print result.join if result.respond_to?(:join)
|
15
|
+
#end
|
16
|
+
#
|
17
|
+
#def git_commit_all(msg)
|
18
|
+
# repo = Grit::Repo.new Dir.pwd
|
19
|
+
# repo.commit_all(msg)
|
20
|
+
#end
|
21
|
+
#
|
22
|
+
#private
|
23
|
+
#def bash_format(command, args)
|
24
|
+
# git_str = "repo.git.native '#{command}', {process_info: true}"
|
25
|
+
# if args.respond_to?(:each)
|
26
|
+
# args.each do |arg|
|
27
|
+
# git_str += ", '#{arg}'"
|
28
|
+
# end
|
29
|
+
# else
|
30
|
+
# arg = args
|
31
|
+
# git_str += ", '#{arg}'"
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# git_str
|
35
|
+
#end
|
36
|
+
#
|
37
|
+
#def handle(bash)
|
38
|
+
# start_idx, end_idx = nil
|
39
|
+
# args = bash.split(' ')
|
40
|
+
# args.each_with_index do |e, idx|
|
41
|
+
# if e[/'|"/]
|
42
|
+
# if start_idx.nil?
|
43
|
+
# start_idx = idx
|
44
|
+
# else
|
45
|
+
# end_idx = idx
|
46
|
+
# break
|
47
|
+
# end
|
48
|
+
# end
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# if start_idx && end_idx && start_idx != end_idx
|
52
|
+
# msg = args[start_idx..end_idx]
|
53
|
+
# front = args - msg
|
54
|
+
# msg = msg.join(' ')
|
55
|
+
# args = front + [msg]
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# args
|
59
|
+
#end
|
60
|
+
#
|
61
|
+
#def add_arg(git_str, arg)
|
62
|
+
#end
|
63
|
+
# end
|
64
|
+
#end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'logger'
|
2
|
+
module RoRoSupport
|
3
|
+
module Log
|
4
|
+
def log_put(log_file, log_caller=nil, *contents)
|
5
|
+
log_file ||= "roro.log"
|
6
|
+
contents.each do |content|
|
7
|
+
if defined?(Rails) && defined?(Rails.root)
|
8
|
+
log = Logger.new("#{Rails.root}/log/#{log_file}")
|
9
|
+
log.fatal(content)
|
10
|
+
end
|
11
|
+
log = Logger.new(STDOUT)
|
12
|
+
log.fatal(content)
|
13
|
+
end
|
14
|
+
|
15
|
+
print log_caller.join("\n") if log_caller && log_caller.is_a?(Array)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
module RoRoSupport
|
2
|
+
module Misc
|
3
|
+
module DollarVars
|
4
|
+
module RegVars
|
5
|
+
$signs4regexp = [
|
6
|
+
' ,',
|
7
|
+
'\\.',
|
8
|
+
'、'
|
9
|
+
]
|
10
|
+
|
11
|
+
def self.or
|
12
|
+
"(#{$signs4regexp.join('|')})"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class StringHandler
|
18
|
+
def collect(str, sign, filter = nil, &blk)
|
19
|
+
new_str_arr = collect_sign(str, sign, filter, &blk)
|
20
|
+
|
21
|
+
new_str = new_str_arr.join
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def collect_sign(str, sign, filter = nil, &blk)
|
26
|
+
raise 'sign must be a regexp' if !sign.is_a?(Regexp)
|
27
|
+
sign = Regexp.new "(?=#{sign.source})"
|
28
|
+
filter ||= sign
|
29
|
+
new_str_arr = []
|
30
|
+
|
31
|
+
str.split(sign).each do |line|
|
32
|
+
if line[filter]
|
33
|
+
if block_given?
|
34
|
+
line = yield(line)
|
35
|
+
end
|
36
|
+
|
37
|
+
new_str_arr << line if line
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
new_str_arr
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def set_instance_variable_from(klass, hash = {})
|
47
|
+
hash.each do |var_name, value|
|
48
|
+
var_sym = "@#{var_name}".to_sym
|
49
|
+
klass.instance_variable_set(var_sym, value)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
class Format
|
54
|
+
class << self
|
55
|
+
def url(url, options = {})
|
56
|
+
return url if url[/http/]
|
57
|
+
if options[:local]
|
58
|
+
return "file://#{url}"
|
59
|
+
else
|
60
|
+
"http://#{url}" unless url[/http:\/\//]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def utf8(html)
|
65
|
+
html.gsub! /charset='.+'/, "charset='utf-8'"
|
66
|
+
html
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
class Req
|
73
|
+
attr_accessor :files
|
74
|
+
# list all files in dir
|
75
|
+
|
76
|
+
def initialize
|
77
|
+
@files = {}
|
78
|
+
end
|
79
|
+
|
80
|
+
def that(dir)
|
81
|
+
dir_load dir
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.all_files_in(dir_path)
|
86
|
+
dirname = dir_path.split('/').last.gsub(/\.rb/, '')
|
87
|
+
Dir[File.expand_path("../#{dirname}/**", dir_path)].each do |file|
|
88
|
+
require file
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def dir_load (dir_name)
|
95
|
+
Dir["#{dir_name}/**"].each do |file|
|
96
|
+
if Dir["#{file}/**"].length >= 1
|
97
|
+
dirname = file
|
98
|
+
$LOAD_PATH << dirname
|
99
|
+
dir_load dirname
|
100
|
+
else
|
101
|
+
filename = File.basename(file).gsub /\..+/, ''
|
102
|
+
@files[filename.to_sym] = file
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|