searchyj 0.5.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.ja.md +2 -2
- data/README.md +2 -2
- data/TODO.md +8 -0
- data/lib/searchyj/cli.rb +16 -16
- data/lib/searchyj/main.rb +15 -14
- data/lib/searchyj/page_size_adjuster.rb +67 -0
- data/lib/searchyj/searcher.rb +16 -6
- data/lib/searchyj/version.rb +1 -1
- data/lib/searchyj.rb +2 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7dd43ee6cf14a41b52906a501cbcb0035e32cb5c
|
4
|
+
data.tar.gz: 407ba21943c025b50d2882b4f709aaa14fbba083
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ac674566cb5abbffb71ba270a7cb75c260b14136d881219eaa1a11a6edc0b4b96b12b2e629fec105d517f4158b31817fee8b273782ed59ed8fb4a421011596f
|
7
|
+
data.tar.gz: 0240303f9607f2b3c2697fa988761e8a5d41cec2290fcab0532a3995c441e48a1440e0848fc1f36db971df879bfe660d3b61d836f63d7d1412d98663eb17056f
|
data/README.ja.md
CHANGED
data/README.md
CHANGED
@@ -83,11 +83,11 @@ The key name for comparing values. You can pass any of 'title' or 'uri'.
|
|
83
83
|
|
84
84
|
The default value is 'title'.
|
85
85
|
|
86
|
-
###
|
86
|
+
### rank
|
87
87
|
|
88
88
|
Print a record of the search result at a particular rank order in the search ranking.
|
89
89
|
|
90
|
-
$ searchyj
|
90
|
+
$ searchyj rank [options] <SearchTerm>
|
91
91
|
|
92
92
|
If the search result was nothing or did not find, print a string of null.
|
93
93
|
|
data/TODO.md
ADDED
data/lib/searchyj/cli.rb
CHANGED
@@ -4,22 +4,6 @@ require 'json'
|
|
4
4
|
|
5
5
|
module SearchYJ
|
6
6
|
class CLI < Thor
|
7
|
-
desc 'at_rank',
|
8
|
-
"Get a record in the search result\n" \
|
9
|
-
'at a particular rank order in the search ranking.'
|
10
|
-
option :rank,
|
11
|
-
type: :numeric,
|
12
|
-
required: true,
|
13
|
-
aliases: '-r',
|
14
|
-
desc: 'The rank order in the search ranking'
|
15
|
-
def at_rank(term)
|
16
|
-
rank = options[:rank]
|
17
|
-
|
18
|
-
puts JSON.dump(
|
19
|
-
result = SearchYJ.at_rank(term, rank)
|
20
|
-
)
|
21
|
-
end
|
22
|
-
|
23
7
|
desc 'detect',
|
24
8
|
"Detect a first record that\n" \
|
25
9
|
'meet the conditions of a regexp and a key.'
|
@@ -65,5 +49,21 @@ module SearchYJ
|
|
65
49
|
SearchYJ.list(term, size, from)
|
66
50
|
)
|
67
51
|
end
|
52
|
+
|
53
|
+
desc 'rank',
|
54
|
+
"Get a record in the search result\n" \
|
55
|
+
'at a particular rank order in the search ranking.'
|
56
|
+
option :rank,
|
57
|
+
type: :numeric,
|
58
|
+
required: true,
|
59
|
+
aliases: '-r',
|
60
|
+
desc: 'The rank order in the search ranking'
|
61
|
+
def rank(term)
|
62
|
+
rank = options[:rank]
|
63
|
+
|
64
|
+
puts JSON.dump(
|
65
|
+
SearchYJ.rank(term, rank)
|
66
|
+
)
|
67
|
+
end
|
68
68
|
end
|
69
69
|
end
|
data/lib/searchyj/main.rb
CHANGED
@@ -2,19 +2,6 @@ require 'searchyj/searcher'
|
|
2
2
|
|
3
3
|
module SearchYJ
|
4
4
|
class Main
|
5
|
-
# Get a record in the search result
|
6
|
-
# at a particular rank order in the search ranking.
|
7
|
-
# @param term [String] Search term
|
8
|
-
# @param rank [Integer] The rank order in the search ranking
|
9
|
-
#
|
10
|
-
# @return [Hash]
|
11
|
-
# A result record if matched the arguments
|
12
|
-
# Else nil
|
13
|
-
def at_rank(term, rank)
|
14
|
-
result = list(term, 1, rank)
|
15
|
-
(result.size > 0) ? result[0] : nil
|
16
|
-
end
|
17
|
-
|
18
5
|
# Detect a first record that
|
19
6
|
# meet the conditions of a regexp and a key.
|
20
7
|
# @param term [String] Search term
|
@@ -29,6 +16,7 @@ module SearchYJ
|
|
29
16
|
|
30
17
|
searcher = Searcher.new
|
31
18
|
searcher.uri.search_term = term
|
19
|
+
searcher.pager.size = 100
|
32
20
|
|
33
21
|
searcher.run do |record|
|
34
22
|
if regexp.match(record[key])
|
@@ -51,7 +39,7 @@ module SearchYJ
|
|
51
39
|
searcher = Searcher.new
|
52
40
|
searcher.uri.search_term = term
|
53
41
|
searcher.uri.index = start_index
|
54
|
-
searcher.
|
42
|
+
searcher.pager.size = size
|
55
43
|
list = []
|
56
44
|
|
57
45
|
searcher.run do |record|
|
@@ -61,5 +49,18 @@ module SearchYJ
|
|
61
49
|
|
62
50
|
list
|
63
51
|
end
|
52
|
+
|
53
|
+
# Get a record in the search result
|
54
|
+
# at a particular rank order in the search ranking.
|
55
|
+
# @param term [String] Search term
|
56
|
+
# @param rank [Integer] The rank order in the search ranking
|
57
|
+
#
|
58
|
+
# @return [Hash]
|
59
|
+
# A result record if matched the arguments
|
60
|
+
# Else nil
|
61
|
+
def rank(term, rank)
|
62
|
+
result = list(term, 1, rank)
|
63
|
+
(result.size > 0) ? result[0] : nil
|
64
|
+
end
|
64
65
|
end
|
65
66
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
module SearchYJ
|
3
|
+
#
|
4
|
+
# Manage cookie for page
|
5
|
+
#
|
6
|
+
# @author [indeep-xyz]
|
7
|
+
#
|
8
|
+
class PageSizeAdjuster
|
9
|
+
attr_reader :size
|
10
|
+
|
11
|
+
SIZE_PATTERN = [10, 15, 20, 30, 40, 100]
|
12
|
+
SIZE_DEFAULT = SIZE_PATTERN[0]
|
13
|
+
COOKIE_BASE = \
|
14
|
+
'sB="n=<<n>>&nw=-1&fp_ipod=0&fp_pl=0"; ' \
|
15
|
+
'path=/; ' \
|
16
|
+
'expire=<<expire>>'
|
17
|
+
EXPIRE_DELAY = 60 * 60 * 24
|
18
|
+
|
19
|
+
# Initialize myself.
|
20
|
+
def initialize
|
21
|
+
@size = SIZE_DEFAULT
|
22
|
+
@expire = Time.now.to_i + EXPIRE_DELAY
|
23
|
+
end
|
24
|
+
|
25
|
+
def size=(size)
|
26
|
+
@size = optimize_page_size(size)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Optimize the number of the page size for searching.
|
30
|
+
# @param size [Number] The number of the page size
|
31
|
+
#
|
32
|
+
# @return [Number] The optimized number
|
33
|
+
def optimize_page_size(size)
|
34
|
+
SIZE_PATTERN.reverse_each do |n|
|
35
|
+
return n if size >= n
|
36
|
+
end
|
37
|
+
|
38
|
+
SIZE_DEFAULT
|
39
|
+
end
|
40
|
+
|
41
|
+
# Attach the cookie string to the argument
|
42
|
+
# if @size has differed from the default value.
|
43
|
+
# @param hash [Hash]
|
44
|
+
#
|
45
|
+
# @return [Hash]
|
46
|
+
def attach_cookie(hash)
|
47
|
+
if @size > SIZE_DEFAULT
|
48
|
+
hash['Cookie'] = create_cookie
|
49
|
+
end
|
50
|
+
|
51
|
+
hash
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
# Create the cookie string to adjust the page size.
|
57
|
+
#
|
58
|
+
# @return [String] The cookie string
|
59
|
+
def create_cookie
|
60
|
+
COOKIE_BASE.gsub(
|
61
|
+
/<<[^>]+>>/,
|
62
|
+
'<<n>>' => @size.to_s,
|
63
|
+
'<<expire>>' => @expire.to_s
|
64
|
+
)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/searchyj/searcher.rb
CHANGED
@@ -2,6 +2,7 @@ require 'nokogiri'
|
|
2
2
|
require 'open-uri'
|
3
3
|
require 'searchyj/uri_manager'
|
4
4
|
require 'searchyj/record_sorter'
|
5
|
+
require 'searchyj/page_size_adjuster'
|
5
6
|
|
6
7
|
module SearchYJ
|
7
8
|
#
|
@@ -13,7 +14,13 @@ module SearchYJ
|
|
13
14
|
#
|
14
15
|
class Searcher
|
15
16
|
attr_reader :results
|
16
|
-
attr_accessor
|
17
|
+
attr_accessor \
|
18
|
+
:pager, :uri, \
|
19
|
+
:limit_loop, :user_agent, :sleep_time
|
20
|
+
|
21
|
+
ENCODING = 'UTF-8'
|
22
|
+
LIMIT_LOOP = 50
|
23
|
+
SLEEP_TIME = 1
|
17
24
|
USER_AGENT = \
|
18
25
|
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0)' \
|
19
26
|
'Gecko/20100101 Firefox/38.0'
|
@@ -22,16 +29,17 @@ module SearchYJ
|
|
22
29
|
|
23
30
|
# Initialize myself.
|
24
31
|
def initialize
|
32
|
+
@pager = PageSizeAdjuster.new
|
25
33
|
@uri = UriManager.new
|
26
|
-
@
|
34
|
+
@encoding = ENCODING
|
35
|
+
@limit_loop = LIMIT_LOOP
|
36
|
+
@sleep_time = SLEEP_TIME
|
27
37
|
@user_agent = USER_AGENT
|
28
|
-
@sleep_time = 1
|
29
|
-
@page_size = 10
|
30
38
|
end
|
31
39
|
|
32
40
|
def run(&block)
|
33
41
|
loop_count = 0
|
34
|
-
sorter = RecordSorter.new(@uri.index, @
|
42
|
+
sorter = RecordSorter.new(@uri.index, @pager.size)
|
35
43
|
|
36
44
|
while loop_count < @limit_loop
|
37
45
|
fetch_html
|
@@ -79,6 +87,8 @@ module SearchYJ
|
|
79
87
|
'User-Agent' => @user_agent
|
80
88
|
}
|
81
89
|
|
90
|
+
params = @pager.attach_cookie(params)
|
91
|
+
|
82
92
|
open(uri, params) do |f|
|
83
93
|
fail OpenUriError unless f.status[0] == '200'
|
84
94
|
f.read
|
@@ -89,7 +99,7 @@ module SearchYJ
|
|
89
99
|
# and set the parsed HTML data to my own instance.
|
90
100
|
def fetch_html
|
91
101
|
raw_html = download_raw_html
|
92
|
-
@html = Nokogiri::HTML.parse(raw_html, nil,
|
102
|
+
@html = Nokogiri::HTML.parse(raw_html, nil, @encoding)
|
93
103
|
end
|
94
104
|
|
95
105
|
# Check whether or not the next page is exist.
|
data/lib/searchyj/version.rb
CHANGED
data/lib/searchyj.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: searchyj
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- indeep-xyz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -96,12 +96,14 @@ files:
|
|
96
96
|
- README.ja.md
|
97
97
|
- README.md
|
98
98
|
- Rakefile
|
99
|
+
- TODO.md
|
99
100
|
- bin/console
|
100
101
|
- bin/searchyj
|
101
102
|
- bin/setup
|
102
103
|
- lib/searchyj.rb
|
103
104
|
- lib/searchyj/cli.rb
|
104
105
|
- lib/searchyj/main.rb
|
106
|
+
- lib/searchyj/page_size_adjuster.rb
|
105
107
|
- lib/searchyj/record_sorter.rb
|
106
108
|
- lib/searchyj/searcher.rb
|
107
109
|
- lib/searchyj/unique_logger.rb
|