Hearch 2018.8.28 → 2018.8.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/Hearch.rb +116 -23
- metadata +22 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 027d6569ecfe8136979a5e251cb8b47613613515
|
|
4
|
+
data.tar.gz: f5c63fa7936c14655806a227bac9810f973b5eda
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7bccddf365d81d95fb52cef66f2ef98b50a276b1a35dd144423317d64167b441fa72f33611d032f86d982910b8a4ae3752fc74d04bf8d877ad87a73deeb510b1
|
|
7
|
+
data.tar.gz: 7faf5c16c64a253c8cb8fb8656fbb56933ede895b5a6f8928109591ee40883f90fa839cc7795ba2ebc1609212ad3f294f961bce3cad82ece552ca5ebe6040045
|
data/lib/Hearch.rb
CHANGED
|
@@ -1,40 +1,133 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
$LOAD_PATH.unshift File.expand_path('../Hearch', __FILE__)
|
|
4
|
+
|
|
5
|
+
# require 'Hearch/HearchIndexEntry_pb'
|
|
6
|
+
require 'Hearch/HearchIndex_pb'
|
|
7
|
+
require 'rmmseg'
|
|
8
|
+
|
|
9
|
+
include RMMSeg
|
|
5
10
|
|
|
6
11
|
class Hearch
|
|
7
12
|
def initialize
|
|
8
|
-
|
|
9
|
-
|
|
13
|
+
@index = nil #索引对象。
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Initialize an empty index.
|
|
17
|
+
#
|
|
18
|
+
# Example:
|
|
19
|
+
# >> hearch=Hearch.new
|
|
20
|
+
# >> hearch.initializeIndex
|
|
21
|
+
def initializeIndex
|
|
22
|
+
@index=Com::Stupidbeauty::Hearch::HearchIndex.new #索引库对象。
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
#分词获取关键字列表。
|
|
26
|
+
def getKeywordList(keywordsString)
|
|
27
|
+
resultList=[] #结果列表。
|
|
28
|
+
|
|
29
|
+
resultList=segment(keywordsString)
|
|
30
|
+
|
|
31
|
+
# resultList << keywordsString #最简单实现,直接将整个字符串加入。
|
|
32
|
+
|
|
33
|
+
resultList #返回结果。
|
|
34
|
+
end #def getKeywordList(keywordsString)
|
|
10
35
|
|
|
11
|
-
|
|
36
|
+
# Load index.
|
|
37
|
+
#
|
|
38
|
+
# Example:
|
|
39
|
+
# >> hearch=Hearch.new
|
|
40
|
+
# >> hearch.loadIndex('indexFile.pb')
|
|
41
|
+
# Arguments:
|
|
42
|
+
# indexFileName: (String)
|
|
43
|
+
def loadIndex(indexFileName)
|
|
44
|
+
fileContent=File.read(indexFileName) #读取文件内容。
|
|
45
|
+
|
|
46
|
+
#解析protobuf:
|
|
47
|
+
@index=Com::Stupidbeauty::Hearch::HearchIndex.decode(fileContent) #protobuf解码。
|
|
12
48
|
end
|
|
13
49
|
|
|
14
|
-
#
|
|
50
|
+
# Save index.
|
|
15
51
|
#
|
|
16
52
|
# Example:
|
|
17
|
-
# >>
|
|
18
|
-
# >>
|
|
53
|
+
# >> hearch=Hearch.new
|
|
54
|
+
# >> hearch.saveIndex('indexFile.pb')
|
|
55
|
+
# Arguments:
|
|
56
|
+
# indexFileName: (String)
|
|
57
|
+
def saveIndex(indexFileName)
|
|
58
|
+
manufacturerListEncoded="" #初始化要存储的缓冲区。
|
|
59
|
+
manufacturerListEncoded=Com::Stupidbeauty::Hearch::HearchIndex.encode(@index) #重新编码。
|
|
60
|
+
|
|
61
|
+
manufacturerListFile=File.open(indexFileName, 'w') #打开本地记录文件。
|
|
62
|
+
manufacturerListFile.write(manufacturerListEncoded) #写入到文件中。
|
|
63
|
+
manufacturerListFile.close #关闭文件。
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Search for articles in the index.
|
|
19
67
|
#
|
|
68
|
+
# Example:
|
|
69
|
+
# >> hearch=Hearch.new
|
|
70
|
+
# >> hearch.loadIndex('indexFile.pb')
|
|
71
|
+
# >> keywordString='something to search'
|
|
72
|
+
# >> resultList=hearch.search(keywordsString)
|
|
20
73
|
# Arguments:
|
|
21
|
-
#
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
74
|
+
# keywordsString: (String)
|
|
75
|
+
def search(keywordsString)
|
|
76
|
+
resultList=[] #结果列表。
|
|
77
|
+
keywordList=getKeywordList(keywordsString) #分词获取关键字列表。
|
|
78
|
+
|
|
79
|
+
@index.entry.each do |currentEntry| #一个个条目地比较。
|
|
80
|
+
keywordList.each do |currentKeyword| #一个个关键字地比较。
|
|
81
|
+
# puts "Current keyword: ", currentKeyword, ", current entry keyword: ", currentEntry.keyword #Debug.
|
|
82
|
+
if currentEntry.keyword==currentKeyword #关键字相同。
|
|
83
|
+
resultList << currentEntry.articleId #将文章编号加入到结果列表中。
|
|
84
|
+
|
|
85
|
+
break #跳出。
|
|
86
|
+
end #if currentEntry.keyword==currentKeyword #关键字相同。
|
|
87
|
+
end #keywordList.each do |currentKeyword| #一个个关键字地比较。
|
|
88
|
+
end #@index.entry.each do |currentEntry| #一个个条目地比较。
|
|
89
|
+
|
|
90
|
+
resultList #返回结果列表。
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Add an article into the index.
|
|
94
|
+
#
|
|
95
|
+
# Example:
|
|
96
|
+
# >> hearch=Hearch.new
|
|
97
|
+
# >> hearch.loadIndex('indexFile.pb')
|
|
98
|
+
# >> articleContentString='something to search'
|
|
99
|
+
# >> hearch.addArticle(articleContentString, 1)
|
|
100
|
+
# Arguments:
|
|
101
|
+
# articleContentString: (String)
|
|
102
|
+
# articleId: (int)
|
|
103
|
+
def addArticle(articleContentString, articleId)
|
|
104
|
+
keywordList=getKeywordList(articleContentString) #分词获取关键字列表。
|
|
30
105
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
106
|
+
#遍历关键字列表:
|
|
107
|
+
keywordList.each do |currentKeyword| #一个个关键字地添加。
|
|
108
|
+
|
|
109
|
+
existEntry=false #是否命中了已有条目。
|
|
34
110
|
|
|
35
|
-
|
|
111
|
+
@index.entry.each do |currentEntry| #一个个条目地比较。
|
|
112
|
+
if currentEntry.keyword==currentKeyword #关键字相同。
|
|
113
|
+
currentEntry.articleId << articleId #将文章编号加入到文章编号列表中。
|
|
114
|
+
|
|
115
|
+
existEntry=true #是命中了已有条目。
|
|
116
|
+
|
|
117
|
+
break #跳出。
|
|
118
|
+
end #if currentEntry.keyword==currentKeyword #关键字相同。
|
|
36
119
|
|
|
37
|
-
|
|
38
|
-
|
|
120
|
+
end #@index.entry.each do |currentEntry| #一个个条目地比较。
|
|
121
|
+
|
|
122
|
+
if (existEntry) #命中了已有条目。
|
|
123
|
+
else #未命中已有条目。
|
|
124
|
+
currentEntry=Com::Stupidbeauty::Hearch::HearchIndexEntry.new #索引库条目对象。
|
|
125
|
+
|
|
126
|
+
currentEntry.keyword=currentKeyword #设置关键字。
|
|
127
|
+
currentEntry.articleId << articleId #将文章编号加入到文章编号列表中。
|
|
128
|
+
|
|
129
|
+
@index.entry << currentEntry #加入条目列表中。
|
|
130
|
+
end #if (existEntry) #命中了已有条目。
|
|
131
|
+
end #keywordList.each do |currentKeyword| #一个个关键字地比较。
|
|
39
132
|
end
|
|
40
133
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: Hearch
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2018.8.
|
|
4
|
+
version: 2018.8.31
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Hxcan Cai
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-08-
|
|
11
|
+
date: 2018-08-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: google-protobuf
|
|
@@ -44,6 +44,26 @@ dependencies:
|
|
|
44
44
|
- - "~>"
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
46
|
version: '1.0'
|
|
47
|
+
- !ruby/object:Gem::Dependency
|
|
48
|
+
name: plexus-rmmseg
|
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '0.1'
|
|
54
|
+
- - ">="
|
|
55
|
+
- !ruby/object:Gem::Version
|
|
56
|
+
version: 0.1.6
|
|
57
|
+
type: :runtime
|
|
58
|
+
prerelease: false
|
|
59
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
60
|
+
requirements:
|
|
61
|
+
- - "~>"
|
|
62
|
+
- !ruby/object:Gem::Version
|
|
63
|
+
version: '0.1'
|
|
64
|
+
- - ">="
|
|
65
|
+
- !ruby/object:Gem::Version
|
|
66
|
+
version: 0.1.6
|
|
47
67
|
description: Hearch ruby. Index and search.
|
|
48
68
|
email: caihuosheng@gmail.com
|
|
49
69
|
executables: []
|