searchyj 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.ja.md +2 -2
- data/README.md +2 -2
- data/TODO.md +8 -0
- data/lib/searchyj/cli.rb +16 -16
- data/lib/searchyj/main.rb +15 -14
- data/lib/searchyj/page_size_adjuster.rb +67 -0
- data/lib/searchyj/searcher.rb +16 -6
- data/lib/searchyj/version.rb +1 -1
- data/lib/searchyj.rb +2 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7dd43ee6cf14a41b52906a501cbcb0035e32cb5c
|
4
|
+
data.tar.gz: 407ba21943c025b50d2882b4f709aaa14fbba083
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ac674566cb5abbffb71ba270a7cb75c260b14136d881219eaa1a11a6edc0b4b96b12b2e629fec105d517f4158b31817fee8b273782ed59ed8fb4a421011596f
|
7
|
+
data.tar.gz: 0240303f9607f2b3c2697fa988761e8a5d41cec2290fcab0532a3995c441e48a1440e0848fc1f36db971df879bfe660d3b61d836f63d7d1412d98663eb17056f
|
data/README.ja.md
CHANGED
data/README.md
CHANGED
@@ -83,11 +83,11 @@ The key name for comparing values. You can pass any of 'title' or 'uri'.
|
|
83
83
|
|
84
84
|
The default value is 'title'.
|
85
85
|
|
86
|
-
###
|
86
|
+
### rank
|
87
87
|
|
88
88
|
Print a record of the search result at a particular rank order in the search ranking.
|
89
89
|
|
90
|
-
$ searchyj
|
90
|
+
$ searchyj rank [options] <SearchTerm>
|
91
91
|
|
92
92
|
If the search result was nothing or did not find, print a string of null.
|
93
93
|
|
data/TODO.md
ADDED
data/lib/searchyj/cli.rb
CHANGED
@@ -4,22 +4,6 @@ require 'json'
|
|
4
4
|
|
5
5
|
module SearchYJ
|
6
6
|
class CLI < Thor
|
7
|
-
desc 'at_rank',
|
8
|
-
"Get a record in the search result\n" \
|
9
|
-
'at a particular rank order in the search ranking.'
|
10
|
-
option :rank,
|
11
|
-
type: :numeric,
|
12
|
-
required: true,
|
13
|
-
aliases: '-r',
|
14
|
-
desc: 'The rank order in the search ranking'
|
15
|
-
def at_rank(term)
|
16
|
-
rank = options[:rank]
|
17
|
-
|
18
|
-
puts JSON.dump(
|
19
|
-
result = SearchYJ.at_rank(term, rank)
|
20
|
-
)
|
21
|
-
end
|
22
|
-
|
23
7
|
desc 'detect',
|
24
8
|
"Detect a first record that\n" \
|
25
9
|
'meet the conditions of a regexp and a key.'
|
@@ -65,5 +49,21 @@ module SearchYJ
|
|
65
49
|
SearchYJ.list(term, size, from)
|
66
50
|
)
|
67
51
|
end
|
52
|
+
|
53
|
+
desc 'rank',
|
54
|
+
"Get a record in the search result\n" \
|
55
|
+
'at a particular rank order in the search ranking.'
|
56
|
+
option :rank,
|
57
|
+
type: :numeric,
|
58
|
+
required: true,
|
59
|
+
aliases: '-r',
|
60
|
+
desc: 'The rank order in the search ranking'
|
61
|
+
def rank(term)
|
62
|
+
rank = options[:rank]
|
63
|
+
|
64
|
+
puts JSON.dump(
|
65
|
+
SearchYJ.rank(term, rank)
|
66
|
+
)
|
67
|
+
end
|
68
68
|
end
|
69
69
|
end
|
data/lib/searchyj/main.rb
CHANGED
@@ -2,19 +2,6 @@ require 'searchyj/searcher'
|
|
2
2
|
|
3
3
|
module SearchYJ
|
4
4
|
class Main
|
5
|
-
# Get a record in the search result
|
6
|
-
# at a particular rank order in the search ranking.
|
7
|
-
# @param term [String] Search term
|
8
|
-
# @param rank [Integer] The rank order in the search ranking
|
9
|
-
#
|
10
|
-
# @return [Hash]
|
11
|
-
# A result record if matched the arguments
|
12
|
-
# Else nil
|
13
|
-
def at_rank(term, rank)
|
14
|
-
result = list(term, 1, rank)
|
15
|
-
(result.size > 0) ? result[0] : nil
|
16
|
-
end
|
17
|
-
|
18
5
|
# Detect a first record that
|
19
6
|
# meet the conditions of a regexp and a key.
|
20
7
|
# @param term [String] Search term
|
@@ -29,6 +16,7 @@ module SearchYJ
|
|
29
16
|
|
30
17
|
searcher = Searcher.new
|
31
18
|
searcher.uri.search_term = term
|
19
|
+
searcher.pager.size = 100
|
32
20
|
|
33
21
|
searcher.run do |record|
|
34
22
|
if regexp.match(record[key])
|
@@ -51,7 +39,7 @@ module SearchYJ
|
|
51
39
|
searcher = Searcher.new
|
52
40
|
searcher.uri.search_term = term
|
53
41
|
searcher.uri.index = start_index
|
54
|
-
searcher.
|
42
|
+
searcher.pager.size = size
|
55
43
|
list = []
|
56
44
|
|
57
45
|
searcher.run do |record|
|
@@ -61,5 +49,18 @@ module SearchYJ
|
|
61
49
|
|
62
50
|
list
|
63
51
|
end
|
52
|
+
|
53
|
+
# Get a record in the search result
|
54
|
+
# at a particular rank order in the search ranking.
|
55
|
+
# @param term [String] Search term
|
56
|
+
# @param rank [Integer] The rank order in the search ranking
|
57
|
+
#
|
58
|
+
# @return [Hash]
|
59
|
+
# A result record if matched the arguments
|
60
|
+
# Else nil
|
61
|
+
def rank(term, rank)
|
62
|
+
result = list(term, 1, rank)
|
63
|
+
(result.size > 0) ? result[0] : nil
|
64
|
+
end
|
64
65
|
end
|
65
66
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
module SearchYJ
|
3
|
+
#
|
4
|
+
# Manage cookie for page
|
5
|
+
#
|
6
|
+
# @author [indeep-xyz]
|
7
|
+
#
|
8
|
+
class PageSizeAdjuster
|
9
|
+
attr_reader :size
|
10
|
+
|
11
|
+
SIZE_PATTERN = [10, 15, 20, 30, 40, 100]
|
12
|
+
SIZE_DEFAULT = SIZE_PATTERN[0]
|
13
|
+
COOKIE_BASE = \
|
14
|
+
'sB="n=<<n>>&nw=-1&fp_ipod=0&fp_pl=0"; ' \
|
15
|
+
'path=/; ' \
|
16
|
+
'expire=<<expire>>'
|
17
|
+
EXPIRE_DELAY = 60 * 60 * 24
|
18
|
+
|
19
|
+
# Initialize myself.
|
20
|
+
def initialize
|
21
|
+
@size = SIZE_DEFAULT
|
22
|
+
@expire = Time.now.to_i + EXPIRE_DELAY
|
23
|
+
end
|
24
|
+
|
25
|
+
def size=(size)
|
26
|
+
@size = optimize_page_size(size)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Optimize the number of the page size for searching.
|
30
|
+
# @param size [Number] The number of the page size
|
31
|
+
#
|
32
|
+
# @return [Number] The optimized number
|
33
|
+
def optimize_page_size(size)
|
34
|
+
SIZE_PATTERN.reverse_each do |n|
|
35
|
+
return n if size >= n
|
36
|
+
end
|
37
|
+
|
38
|
+
SIZE_DEFAULT
|
39
|
+
end
|
40
|
+
|
41
|
+
# Attach the cookie string to the argument
|
42
|
+
# if @size has differed from the default value.
|
43
|
+
# @param hash [Hash]
|
44
|
+
#
|
45
|
+
# @return [Hash]
|
46
|
+
def attach_cookie(hash)
|
47
|
+
if @size > SIZE_DEFAULT
|
48
|
+
hash['Cookie'] = create_cookie
|
49
|
+
end
|
50
|
+
|
51
|
+
hash
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
# Create the cookie string to adjust the page size.
|
57
|
+
#
|
58
|
+
# @return [String] The cookie string
|
59
|
+
def create_cookie
|
60
|
+
COOKIE_BASE.gsub(
|
61
|
+
/<<[^>]+>>/,
|
62
|
+
'<<n>>' => @size.to_s,
|
63
|
+
'<<expire>>' => @expire.to_s
|
64
|
+
)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/searchyj/searcher.rb
CHANGED
@@ -2,6 +2,7 @@ require 'nokogiri'
|
|
2
2
|
require 'open-uri'
|
3
3
|
require 'searchyj/uri_manager'
|
4
4
|
require 'searchyj/record_sorter'
|
5
|
+
require 'searchyj/page_size_adjuster'
|
5
6
|
|
6
7
|
module SearchYJ
|
7
8
|
#
|
@@ -13,7 +14,13 @@ module SearchYJ
|
|
13
14
|
#
|
14
15
|
class Searcher
|
15
16
|
attr_reader :results
|
16
|
-
attr_accessor
|
17
|
+
attr_accessor \
|
18
|
+
:pager, :uri, \
|
19
|
+
:limit_loop, :user_agent, :sleep_time
|
20
|
+
|
21
|
+
ENCODING = 'UTF-8'
|
22
|
+
LIMIT_LOOP = 50
|
23
|
+
SLEEP_TIME = 1
|
17
24
|
USER_AGENT = \
|
18
25
|
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0)' \
|
19
26
|
'Gecko/20100101 Firefox/38.0'
|
@@ -22,16 +29,17 @@ module SearchYJ
|
|
22
29
|
|
23
30
|
# Initialize myself.
|
24
31
|
def initialize
|
32
|
+
@pager = PageSizeAdjuster.new
|
25
33
|
@uri = UriManager.new
|
26
|
-
@
|
34
|
+
@encoding = ENCODING
|
35
|
+
@limit_loop = LIMIT_LOOP
|
36
|
+
@sleep_time = SLEEP_TIME
|
27
37
|
@user_agent = USER_AGENT
|
28
|
-
@sleep_time = 1
|
29
|
-
@page_size = 10
|
30
38
|
end
|
31
39
|
|
32
40
|
def run(&block)
|
33
41
|
loop_count = 0
|
34
|
-
sorter = RecordSorter.new(@uri.index, @
|
42
|
+
sorter = RecordSorter.new(@uri.index, @pager.size)
|
35
43
|
|
36
44
|
while loop_count < @limit_loop
|
37
45
|
fetch_html
|
@@ -79,6 +87,8 @@ module SearchYJ
|
|
79
87
|
'User-Agent' => @user_agent
|
80
88
|
}
|
81
89
|
|
90
|
+
params = @pager.attach_cookie(params)
|
91
|
+
|
82
92
|
open(uri, params) do |f|
|
83
93
|
fail OpenUriError unless f.status[0] == '200'
|
84
94
|
f.read
|
@@ -89,7 +99,7 @@ module SearchYJ
|
|
89
99
|
# and set the parsed HTML data to my own instance.
|
90
100
|
def fetch_html
|
91
101
|
raw_html = download_raw_html
|
92
|
-
@html = Nokogiri::HTML.parse(raw_html, nil,
|
102
|
+
@html = Nokogiri::HTML.parse(raw_html, nil, @encoding)
|
93
103
|
end
|
94
104
|
|
95
105
|
# Check whether or not the next page is exist.
|
data/lib/searchyj/version.rb
CHANGED
data/lib/searchyj.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: searchyj
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- indeep-xyz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -96,12 +96,14 @@ files:
|
|
96
96
|
- README.ja.md
|
97
97
|
- README.md
|
98
98
|
- Rakefile
|
99
|
+
- TODO.md
|
99
100
|
- bin/console
|
100
101
|
- bin/searchyj
|
101
102
|
- bin/setup
|
102
103
|
- lib/searchyj.rb
|
103
104
|
- lib/searchyj/cli.rb
|
104
105
|
- lib/searchyj/main.rb
|
106
|
+
- lib/searchyj/page_size_adjuster.rb
|
105
107
|
- lib/searchyj/record_sorter.rb
|
106
108
|
- lib/searchyj/searcher.rb
|
107
109
|
- lib/searchyj/unique_logger.rb
|