Hearch 2018.8.28 → 2018.8.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/Hearch.rb +116 -23
  3. metadata +22 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4929a505b5a640e52533103964b1861bb2fc7438
4
- data.tar.gz: e3975cc85818341fbd2bd3b4860ec7b35fcb4669
3
+ metadata.gz: 027d6569ecfe8136979a5e251cb8b47613613515
4
+ data.tar.gz: f5c63fa7936c14655806a227bac9810f973b5eda
5
5
  SHA512:
6
- metadata.gz: a913769d94d92e545759fbd4b2580b28b9b23d16be2c696ac20d220b87a2cea95077eb471322346d7ae22cbcab4cfe2027f7ea5706bdee776c6d985b9d4f054b
7
- data.tar.gz: 22f391288f8801d7e189bf6707db940eb1e28d186cfaf93a585c3be7681486ff4c019de84e36a96e2d8c83d712494ce81018807cc792109bf1495ff11f48e4ae
6
+ metadata.gz: 7bccddf365d81d95fb52cef66f2ef98b50a276b1a35dd144423317d64167b441fa72f33611d032f86d982910b8a4ae3752fc74d04bf8d877ad87a73deeb510b1
7
+ data.tar.gz: 7faf5c16c64a253c8cb8fb8656fbb56933ede895b5a6f8928109591ee40883f90fa839cc7795ba2ebc1609212ad3f294f961bce3cad82ece552ca5ebe6040045
@@ -1,40 +1,133 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'Hearch/ExistMessage_pb'
4
- require 'Hearch/MulticastHandler'
3
+ $LOAD_PATH.unshift File.expand_path('../Hearch', __FILE__)
4
+
5
+ # require 'Hearch/HearchIndexEntry_pb'
6
+ require 'Hearch/HearchIndex_pb'
7
+ require 'rmmseg'
8
+
9
+ include RMMSeg
5
10
 
6
11
  class Hearch
7
12
  def initialize
8
- host = '239.173.40.5' #组播组主机地址。
9
- port = 11500 #组播组端口。
13
+ @index = nil #索引对象。
14
+ end
15
+
16
+ # Initialize an empty index.
17
+ #
18
+ # Example:
19
+ # >> hearch=Hearch.new
20
+ # >> hearch.initializeIndex
21
+ def initializeIndex
22
+ @index=Com::Stupidbeauty::Hearch::HearchIndex.new #索引库对象。
23
+ end
24
+
25
+ #分词获取关键字列表。
26
+ def getKeywordList(keywordsString)
27
+ resultList=[] #结果列表。
28
+
29
+ resultList=segment(keywordsString)
30
+
31
+ # resultList << keywordsString #最简单实现,直接将整个字符串加入。
32
+
33
+ resultList #返回结果。
34
+ end #def getKeywordList(keywordsString)
10
35
 
11
- @networkThread = EM::open_datagram_socket(host, port, MulticastHandler)
36
+ # Load index.
37
+ #
38
+ # Example:
39
+ # >> hearch=Hearch.new
40
+ # >> hearch.loadIndex('indexFile.pb')
41
+ # Arguments:
42
+ # indexFileName: (String)
43
+ def loadIndex(indexFileName)
44
+ fileContent=File.read(indexFileName) #读取文件内容。
45
+
46
+ #解析protobuf:
47
+ @index=Com::Stupidbeauty::Hearch::HearchIndex.decode(fileContent) #protobuf解码。
12
48
  end
13
49
 
14
- # Publish a service to the LAN.
50
+ # Save index.
15
51
  #
16
52
  # Example:
17
- # >> existRuby=ExistRuby.new
18
- # >> existRuby.publishService("fp_serverPython", 10086, exist::ServicePublishMessage::ServiceProtocolType::GRPC)
53
+ # >> hearch=Hearch.new
54
+ # >> hearch.saveIndex('indexFile.pb')
55
+ # Arguments:
56
+ # indexFileName: (String)
57
+ def saveIndex(indexFileName)
58
+ manufacturerListEncoded="" #初始化要存储的缓冲区。
59
+ manufacturerListEncoded=Com::Stupidbeauty::Hearch::HearchIndex.encode(@index) #重新编码。
60
+
61
+ manufacturerListFile=File.open(indexFileName, 'w') #打开本地记录文件。
62
+ manufacturerListFile.write(manufacturerListEncoded) #写入到文件中。
63
+ manufacturerListFile.close #关闭文件。
64
+ end
65
+
66
+ # Search for articles in the index.
19
67
  #
68
+ # Example:
69
+ # >> hearch=Hearch.new
70
+ # >> hearch.loadIndex('indexFile.pb')
71
+ # >> keywordString='something to search'
72
+ # >> resultList=hearch.search(keywordsString)
20
73
  # Arguments:
21
- # lanServiceName: (String)
22
- # lanServicePort: (int)
23
- # lanServiceProtocolType: (enum)
24
- def publishService(lanServiceName, lanServicePort, lanServiceProtocolType)
25
- existMessage=Com::Stupidbeauty::Exist::ExistMessage.new #消息体。
26
-
27
- existMessage.messageType= Com::Stupidbeauty::Exist::ExistMessage::MessageType::SERVICEPUBLISH #设置消息类型。
28
-
29
- existMessage.servicePublishMessage=Com::Stupidbeauty::Exist::ServicePublishMessage.new #创建服务发布消息体。
74
+ # keywordsString: (String)
75
+ def search(keywordsString)
76
+ resultList=[] #结果列表。
77
+ keywordList=getKeywordList(keywordsString) #分词获取关键字列表。
78
+
79
+ @index.entry.each do |currentEntry| #一个个条目地比较。
80
+ keywordList.each do |currentKeyword| #一个个关键字地比较。
81
+ # puts "Current keyword: ", currentKeyword, ", current entry keyword: ", currentEntry.keyword #Debug.
82
+ if currentEntry.keyword==currentKeyword #关键字相同。
83
+ resultList << currentEntry.articleId #将文章编号加入到结果列表中。
84
+
85
+ break #跳出。
86
+ end #if currentEntry.keyword==currentKeyword #关键字相同。
87
+ end #keywordList.each do |currentKeyword| #一个个关键字地比较。
88
+ end #@index.entry.each do |currentEntry| #一个个条目地比较。
89
+
90
+ resultList #返回结果列表。
91
+ end
92
+
93
+ # Add an article into the index.
94
+ #
95
+ # Example:
96
+ # >> hearch=Hearch.new
97
+ # >> hearch.loadIndex('indexFile.pb')
98
+ # >> articleContentString='something to search'
99
+ # >> hearch.addArticle(articleContentString, 1)
100
+ # Arguments:
101
+ # articleContentString: (String)
102
+ # articleId: (int)
103
+ def addArticle(articleContentString, articleId)
104
+ keywordList=getKeywordList(articleContentString) #分词获取关键字列表。
30
105
 
31
- existMessage.servicePublishMessage.name=lanServiceName #设置服务名字。
32
- existMessage.servicePublishMessage.port=lanServicePort #设置端口号。
33
- existMessage.servicePublishMessage.protocolType=lanServiceProtocolType #设置协议类型。
106
+ #遍历关键字列表:
107
+ keywordList.each do |currentKeyword| #一个个关键字地添加。
108
+
109
+ existEntry=false #是否命中了已有条目。
34
110
 
35
- packageString=Com::Stupidbeauty::Exist::ExistMessage.encode(existMessage) #序列化为字符串。
111
+ @index.entry.each do |currentEntry| #一个个条目地比较。
112
+ if currentEntry.keyword==currentKeyword #关键字相同。
113
+ currentEntry.articleId << articleId #将文章编号加入到文章编号列表中。
114
+
115
+ existEntry=true #是命中了已有条目。
116
+
117
+ break #跳出。
118
+ end #if currentEntry.keyword==currentKeyword #关键字相同。
36
119
 
37
- #发送:
38
- @networkThread.castData(packageString) #发送数据。
120
+ end #@index.entry.each do |currentEntry| #一个个条目地比较。
121
+
122
+ if (existEntry) #命中了已有条目。
123
+ else #未命中已有条目。
124
+ currentEntry=Com::Stupidbeauty::Hearch::HearchIndexEntry.new #索引库条目对象。
125
+
126
+ currentEntry.keyword=currentKeyword #设置关键字。
127
+ currentEntry.articleId << articleId #将文章编号加入到文章编号列表中。
128
+
129
+ @index.entry << currentEntry #加入条目列表中。
130
+ end #if (existEntry) #命中了已有条目。
131
+ end #keywordList.each do |currentKeyword| #一个个关键字地比较。
39
132
  end
40
133
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: Hearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 2018.8.28
4
+ version: 2018.8.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hxcan Cai
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-28 00:00:00.000000000 Z
11
+ date: 2018-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-protobuf
@@ -44,6 +44,26 @@ dependencies:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '1.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: plexus-rmmseg
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '0.1'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 0.1.6
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - "~>"
62
+ - !ruby/object:Gem::Version
63
+ version: '0.1'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 0.1.6
47
67
  description: Hearch ruby. Index and search.
48
68
  email: caihuosheng@gmail.com
49
69
  executables: []