nlpir 0.0.4-x86-mingw32 → 1.0.0-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +146 -8
- data/bin/NLPIR.dll +0 -0
- data/lib/Data/Configure.xml +16 -15
- data/lib/Data/DocExtractor.user +2 -0
- data/lib/Data/English/English.pdat +0 -0
- data/lib/Data/English/English.pos +0 -0
- data/lib/Data/English/English.ung +0 -0
- data/lib/Data/English/English.wordlist +0 -0
- data/lib/Data/English/Irrel2regular.map +0 -0
- data/lib/Data/English/ne.pdat +0 -0
- data/lib/Data/English/ne.pos +0 -0
- data/lib/Data/English/ne.wordlist +0 -0
- data/lib/Data/FTU8.pdat +0 -0
- data/lib/Data/FTU8.wordlist +0 -0
- data/lib/Data/FTU82GBK.map +0 -0
- data/lib/Data/FieldDict.pdat +0 -0
- data/lib/Data/FieldDict.pos +0 -0
- data/lib/Data/GBK2FTU8.map +0 -0
- data/lib/Data/ICTPOS.map +4 -0
- data/lib/Data/NLPIR.user +0 -0
- data/lib/Data/NewWord.lst +18 -15
- data/lib/Data/PKU.map +4 -0
- data/lib/Data/PKU_First.map +4 -0
- data/lib/Data/UserDict.pdat +0 -0
- data/lib/Data/location.map +0 -0
- data/lib/Data/location.pdat +0 -0
- data/lib/Data/location.wordlist +0 -0
- data/lib/Data/sentiment.pdat +0 -0
- data/lib/Data/sentiment.ung +0 -0
- data/lib/nlpir.rb +187 -163
- data/lib/nlpir/version.rb +1 -1
- data/test/Data/Configure.xml +16 -15
- data/test/Data/DocExtractor.user +2 -0
- data/test/Data/English/English.pdat +0 -0
- data/test/Data/English/English.pos +0 -0
- data/test/Data/English/English.ung +0 -0
- data/test/Data/English/English.wordlist +0 -0
- data/test/Data/English/Irrel2regular.map +0 -0
- data/test/Data/English/ne.pdat +0 -0
- data/test/Data/English/ne.pos +0 -0
- data/test/Data/English/ne.wordlist +0 -0
- data/test/Data/FTU8.pdat +0 -0
- data/test/Data/FTU8.wordlist +0 -0
- data/test/Data/FTU82GBK.map +0 -0
- data/test/Data/GBK2FTU8.map +0 -0
- data/test/Data/ICTPOS.map +4 -0
- data/test/Data/NLPIR.user +0 -0
- data/test/Data/NewWord.lst +18 -63
- data/test/Data/PKU.map +4 -0
- data/test/Data/PKU_First.map +4 -0
- data/test/Data/UserDict.pdat +0 -0
- data/test/Data/location.map +0 -0
- data/test/Data/location.pdat +0 -0
- data/test/Data/location.wordlist +0 -0
- data/test/Data/sentiment.pdat +0 -0
- data/test/Data/sentiment.ung +0 -0
- data/test/findnewword.txt +103 -0
- data/test/test_nlpir.rb +137 -140
- data/test/test_result.txt +52 -35
- data/test/userdict.txt +5 -5
- metadata +59 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 01b964073b5822742a58c2b2c1e438ee599bf466
|
4
|
+
data.tar.gz: a0293eb0eed9928e6ca2b7d4abbbd0fa3572e46a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d76198cc3d39291a3d14c1b79b60a975cefd37a8ffdf50e9afddd2ae0439f1710f990bc79eb9ad1d4e7a9498befd0921abf264c868715c4394db5e65bfa5c8c0
|
7
|
+
data.tar.gz: fd64bb7fd5f1d5baa4686edb1854631f062bc71dc64bfb4305a514c96e35c55c0c60ebce1184d3c3563d3b01f841e6cf8ff3b41bdbc173e2401460d87ab795bb
|
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# Nlpir_win
|
2
2
|
|
3
|
-
A rubygem wrapper of chinese segment tools
|
3
|
+
A rubygem wrapper of chinese segment tools ICTCLAS2014
|
4
4
|
|
5
|
-
Nlpir version 0.0.4 , gem nlpir-0.0.4-x86-mingw32 support '
|
5
|
+
Nlpir version 0.0.4 , gem nlpir-0.0.4-x86-mingw32 support '~ ruby2.0.0' on win7. For *nix OS is [nlpir](https://github.com/JoeWoo/nlpir)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -45,8 +45,148 @@ some DEFINE you may use :
|
|
45
45
|
```
|
46
46
|
|
47
47
|
after you gem install it:
|
48
|
+
##ruby-style func
|
49
|
+
```ruby
|
50
|
+
require 'nlpir'
|
51
|
+
include Nlpir
|
48
52
|
|
49
|
-
|
53
|
+
s = "坚定不移沿着中国特色社会主义道路前进 为全面建成小康社会而奋斗"
|
54
|
+
#first of all : Call the NLPIR API nlpir_init
|
55
|
+
|
56
|
+
nlpir_init(File.expand_path("../", __FILE__),UTF8_CODE)
|
57
|
+
|
58
|
+
#example1: Process a paragraph, and return the result text with POS or not
|
59
|
+
puts text_proc(s, NLPIR_TRUE)
|
60
|
+
puts text_proc(s, NLPIR_FALSE)
|
61
|
+
|
62
|
+
#example2: Process a paragraph, and return an array filled elements are POSed words.
|
63
|
+
#tips: text_procA() return the array, and its memory is malloced by NLPIR, it will be freed by nlpir_exit() (memory in server)
|
64
|
+
|
65
|
+
words_list = text_procA(s)
|
66
|
+
i=1
|
67
|
+
words_list.each do |a|
|
68
|
+
sWhichDic=""
|
69
|
+
case a.word_type
|
70
|
+
when 0
|
71
|
+
sWhichDic = "核心词典"
|
72
|
+
when 1
|
73
|
+
sWhichDic = "用户词典"
|
74
|
+
when 2
|
75
|
+
sWhichDic = "专业词典"
|
76
|
+
end
|
77
|
+
puts "No.#{i}:start:#{a.start}, length:#{a.length}, POS_ID:#{a.sPOS},word_ID:#{a.word_ID},word_type:#{a.word_type} , UserDefine:#{sWhichDic}, Word:#{s.byteslice(a.start,a.length)}, Weight:#{a.weight}\n"
|
78
|
+
i += 1
|
79
|
+
end
|
80
|
+
|
81
|
+
#example3: Process a paragraph, and return an array filled elements are POSed words.
|
82
|
+
#tips: text_procAW() return the array, and its memory is malloced by ruby::fiddle,and be collect by GC (memory in agent)
|
83
|
+
|
84
|
+
words_list = text_procAW(s)
|
85
|
+
i=1
|
86
|
+
words_list.each do |a|
|
87
|
+
sWhichDic=""
|
88
|
+
case a.word_type
|
89
|
+
when 0
|
90
|
+
sWhichDic = "核心词典"
|
91
|
+
when 1
|
92
|
+
sWhichDic = "用户词典"
|
93
|
+
when 2
|
94
|
+
sWhichDic = "专业词典"
|
95
|
+
end
|
96
|
+
puts "No.#{i}:start:#{a.start}, length:#{a.length}, POS_ID:#{a.sPOS},word_ID:#{a.word_ID},word_type:#{a.word_type} , UserDefine:#{sWhichDic}, Word:#{s.byteslice(a.start,a.length)}, Weight:#{a.weight}\n"
|
97
|
+
i += 1
|
98
|
+
end
|
99
|
+
|
100
|
+
#example4: Process a text file, and wirte the result text to file
|
101
|
+
puts file_proc("./test.txt", "./test_result.txt", NULL)
|
102
|
+
|
103
|
+
|
104
|
+
#example5: Get ProcessAWordCount, it returns the count of the words
|
105
|
+
puts count = file_wordcount(s)
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
#example6: Add/Delete a word to the user dictionary (the path of user dictionary of the path is ./data/userdict.dpat)
|
110
|
+
puts text_proc("我们都是爱思客")
|
111
|
+
#add a user word
|
112
|
+
add_userword("都是爱思客 n")
|
113
|
+
add_userword("思客 n")
|
114
|
+
add_userword("你是 n")
|
115
|
+
add_userword("都是客 n")
|
116
|
+
add_userword("都是爱 n")
|
117
|
+
puts text_proc("我们都是爱思客")
|
118
|
+
#save the user word to disk
|
119
|
+
save_userdict()
|
120
|
+
puts text_proc("我们都是爱思客")
|
121
|
+
#delete a user word
|
122
|
+
del_userword("都是爱思客")
|
123
|
+
save_userdict()
|
124
|
+
puts text_proc("我们都是爱思客")
|
125
|
+
|
126
|
+
|
127
|
+
#example7: Import user-defined dictionary from a text file. and puts NLPIR result
|
128
|
+
puts text_proc("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
129
|
+
puts import_userdict("./userdict.txt")
|
130
|
+
#you can see the example file: ./userdict.txt to know the userdict`s format requirements
|
131
|
+
save_userdict()
|
132
|
+
puts text_proc("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
133
|
+
|
134
|
+
|
135
|
+
#example8: Get keywords of text
|
136
|
+
#2nd parameter is the MaxNumber of keywords
|
137
|
+
#3rd parameter is a swith to show the WeightOut or not
|
138
|
+
puts text_keywords(s, 50,NLPIR_TRUE)
|
139
|
+
|
140
|
+
|
141
|
+
#example9: Get keywords from file
|
142
|
+
puts file_keywords("./test.txt",50, NLPIR_TRUE)
|
143
|
+
|
144
|
+
|
145
|
+
#example10: Find new words from text
|
146
|
+
puts text_newwords(s, 50, NLPIR_TRUE)
|
147
|
+
|
148
|
+
|
149
|
+
#example11: Find new words from file
|
150
|
+
puts file_newwords("./test.txt")
|
151
|
+
|
152
|
+
|
153
|
+
#example12: Extract a finger print from the paragraph
|
154
|
+
puts text_fingerprint(s)
|
155
|
+
|
156
|
+
|
157
|
+
#example13: select which pos map will use
|
158
|
+
#ICT_POS_MAP_FIRST #//计算所一级标注集
|
159
|
+
#ICT_POS_MAP_SECOND #//计算所二级标注集
|
160
|
+
#PKU_POS_MAP_SECOND #//北大二级标注集
|
161
|
+
#PKU_POS_MAP_FIRST #//北大一级标注集
|
162
|
+
setPOSmap(ICT_POS_MAP_FIRST)
|
163
|
+
puts text_proc(s)
|
164
|
+
setPOSmap(PKU_POS_MAP_FIRST)
|
165
|
+
puts text_proc(s)
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
# 新词发现批量处理功能
|
170
|
+
#以下函数为2013版本专门针对新词发现的过程,一般建议脱机实现,不宜在线处理
|
171
|
+
# 新词识别完成后,再自动导入到分词系统中,即可完成
|
172
|
+
|
173
|
+
NWI_start() #启动新词发现功能
|
174
|
+
f=File.new("test.txt", "r")
|
175
|
+
text=f.read
|
176
|
+
NWI_addfile(text)#添加新词训练的文件,可反复添加
|
177
|
+
NWI_complete()#添加文件或者训练内容结束
|
178
|
+
f.close()
|
179
|
+
puts NWI_result()#输出新词识别结果
|
180
|
+
#puts file_proc("a.txt","b.txt")
|
181
|
+
NWI_result2userdict()#新词识别结果导入到用户词典
|
182
|
+
|
183
|
+
|
184
|
+
#at the end call NLPIR_Exit() to free system materials
|
185
|
+
nlpir_exit()
|
186
|
+
|
187
|
+
|
188
|
+
```
|
189
|
+
##c-style func
|
50
190
|
|
51
191
|
```ruby
|
52
192
|
|
@@ -171,12 +311,9 @@ also can see some examples from test cases [here](https://github.com/JoeWoo/nlpi
|
|
171
311
|
# 新词识别完成后,再自动导入到分词系统中,即可完成
|
172
312
|
|
173
313
|
NLPIR_NWI_Start() #启动新词发现功能
|
174
|
-
|
175
|
-
text=f.read
|
176
|
-
NLPIR_NWI_AddFile(text)#添加新词训练的文件,可反复添加
|
314
|
+
NLPIR_NWI_AddFile("./text.txt")#添加新词训练的文件,可反复添加
|
177
315
|
NLPIR_NWI_Complete()#添加文件或者训练内容结束
|
178
|
-
|
179
|
-
puts NLPIR_NWI_GetResult()#输出新词识别结果
|
316
|
+
puts NLPIR_NWI_GetResult().to_s#输出新词识别结果 可传入一个参数NLPIR_TRUE或NLPIR_FALSE,用于是否输出词性
|
180
317
|
#puts NLPIR_FileProcess("a.txt","b.txt")
|
181
318
|
NLPIR_NWI_Result2UserDict()#新词识别结果导入到用户词典
|
182
319
|
|
@@ -186,6 +323,7 @@ also can see some examples from test cases [here](https://github.com/JoeWoo/nlpi
|
|
186
323
|
|
187
324
|
```
|
188
325
|
|
326
|
+
|
189
327
|
## Contributing
|
190
328
|
|
191
329
|
1. Fork it
|
data/bin/NLPIR.dll
CHANGED
Binary file
|
data/lib/Data/Configure.xml
CHANGED
@@ -1,15 +1,16 @@
|
|
1
|
-
<?xmlversion="1.0"encoding="GB2312"?>
|
2
|
-
<NLPIR>
|
3
|
-
<TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
|
4
|
-
<UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
|
5
|
-
<UserDictPrior>
|
6
|
-
<FieldDict>
|
7
|
-
<GranularityContorl>off</GranularityContorl>
|
8
|
-
<Log>
|
9
|
-
<version>2013</version>//ϵͳ�汾��
|
10
|
-
<Modify>2012-11-14</Modify>//ϵͳ�����ʱ��
|
11
|
-
<Lexicon>2012-11-14</Lexicon>//�ʵ������ʱ��
|
12
|
-
<
|
13
|
-
<
|
14
|
-
<
|
15
|
-
</
|
1
|
+
<?xmlversion="1.0"encoding="GB2312"?>
|
2
|
+
<NLPIR>
|
3
|
+
<TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
|
4
|
+
<UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
|
5
|
+
<UserDictPrior>Off</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
|
6
|
+
<FieldDict>on</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
|
7
|
+
<GranularityContorl>off</GranularityContorl>
|
8
|
+
<Log>Off</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
|
9
|
+
<version>2013</version>//ϵͳ�汾��
|
10
|
+
<Modify>2012-11-14</Modify>//ϵͳ�����ʱ��
|
11
|
+
<Lexicon>2012-11-14</Lexicon>//�ʵ������ʱ��
|
12
|
+
<Sentiment>On</Sentiment>//On,Off�����磺Off:�ر���з������ܣ�On:����з�������
|
13
|
+
<adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
|
14
|
+
<author>�Ż�ƽ��ʿ</author>//����
|
15
|
+
<Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
|
16
|
+
</NLPIR>
|
@@ -0,0 +1,2 @@
|
|
1
|
+
���ť��ݸܸ��������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�ábQBw6u|V
|
2
|
+
-rt�����������I+g���蟋�����'"&t#''������̣ռ�܃�����רע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ���������˛��̷���ޜ��̍�Ӟ�����������ā���و����ҁ��䂜�˃�������ᔆ���Oww.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��עЮ������. /=-++<���ɩв�ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿ����������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����Fww.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������ϓ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ����������������順�ӧ����������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ���
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/FTU8.pdat
ADDED
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/FieldDict.pdat
CHANGED
Binary file
|
data/lib/Data/FieldDict.pos
CHANGED
Binary file
|
Binary file
|
data/lib/Data/ICTPOS.map
CHANGED
data/lib/Data/NLPIR.user
CHANGED
Binary file
|
data/lib/Data/NewWord.lst
CHANGED
@@ -1,25 +1,28 @@
|
|
1
|
+
|
1
2
|
��˿ ���� � ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�
|
3
|
+
����ɽ �ΰ������� ���������� �������� ��Ʒ���� ��������
|
4
|
+
�й���ɫ������� ������������ ��ѧ��չ�� ��̬���� ��ṫƽ���� �������� �л�����ΰ���� ���幦���� �����ռ俪�� ��������� ���緢չһ�廯 С����� �������� ��������Ļ�ǿ�� ��ҵ��λ ���� ����ѡ�� �������� ���� ��������г��������� ��������� �������� ȫ������ ��Ҫս�Ի��� ȡ���ش��չ ��̬ϵͳ �Ȳ��ƽ� ���� �ӿ�ת�� ���ʽ���Σ�� ���¶����� ������������ ��������Ƚ��Ļ� ũҵ�ۺ��������� ʳƷҩƷ��ȫ ��۵�����ϵ ����ͬԸ�� �������� ˾������ �������ϱ��� ȫ���л���Ů ս�������˲�ҵ ����Ϸ�Ȩ�� ȫ������ͬ ������������ ��ǰ����ƽ�� �Ļ���ʵ�� ��֤�������� ��������ϵ ȫ��ҽ�� ����ֿ� ����������չ �����Ч �����ƶȸĸ� �������� ����δ�� ʵ�徭�� ��ѧ��ˮƽ ���㵳�� ���Ȼ� ȫ�潨��С����� �������� ��ʳ��ȫ ����ʵ�� ����ɲ� �������� �������� ���ر��� ����ڵ� ��ѧ�ش� �������� ���������� ��������� ��ȫ��в ��������� �����Ļ� ������� ����Ӱ���� �Ծ����� �Ͷ����� �ִ�����ҵ �������� ���˹�ͬ�� ��ʵ�ƽ� �Ҹ����� ���ļ�ֵ�� ���η��� ����������� ������ ƽ�Ȼ��� ������ҡ��� ����� ��Ҫս�Ի����� ת�侭�÷�չ��ʽ �ӿ�ת�侭�÷�չ��ʽ ���������ļ�ֵ��ϵ ����ȡ���ش��չ ʵ���л�����ΰ���� �����ں�ʽ��չ ��������������Ȼ� �ĸ↑�� ά����ṫƽ����
|
2
5
|
��˿ ���� � ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�
|
6
|
+
��˿ ���� � ��˿�Ļ� ��Q �������Ļ� Ⱥ���Գ� ���ݱ� ��˧
|
7
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#��˧#
|
3
8
|
|
4
9
|
|
5
10
|
|
6
11
|
|
7
12
|
|
8
13
|
|
14
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
15
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
9
16
|
|
10
17
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
18
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
19
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
20
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
21
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
22
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
23
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
24
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
25
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
26
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
27
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
28
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
data/lib/Data/PKU.map
CHANGED
data/lib/Data/PKU_First.map
CHANGED
data/lib/Data/UserDict.pdat
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/nlpir.rb
CHANGED
@@ -3,7 +3,7 @@ require File.expand_path("../nlpir/version", __FILE__)
|
|
3
3
|
require 'fiddle'
|
4
4
|
require 'fiddle/struct'
|
5
5
|
require 'fiddle/import'
|
6
|
-
require 'fileutils'
|
6
|
+
require 'fileutils'
|
7
7
|
include Fiddle::CParser
|
8
8
|
include Fiddle::Importer
|
9
9
|
|
@@ -14,169 +14,170 @@ module Nlpir
|
|
14
14
|
ICT_POS_MAP_FIRST = 1 #计算所一级标注集
|
15
15
|
ICT_POS_MAP_SECOND = 0 #计算所二级标注集
|
16
16
|
PKU_POS_MAP_SECOND = 2 #北大二级标注集
|
17
|
-
PKU_POS_MAP_FIRST = 3
|
17
|
+
PKU_POS_MAP_FIRST = 3 #北大一级标注集
|
18
18
|
POS_SIZE = 40
|
19
19
|
|
20
20
|
Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
|
21
|
-
|
22
|
-
|
21
|
+
'int word_ID','int word_type','int weight']
|
22
|
+
|
23
23
|
GBK_CODE = 0 #默认支持GBK编码
|
24
24
|
UTF8_CODE = GBK_CODE + 1 #UTF8编码
|
25
25
|
BIG5_CODE = GBK_CODE + 2 #BIG5编码
|
26
26
|
GBK_FANTI_CODE = GBK_CODE + 3 #GBK编码,里面包含繁体字
|
27
27
|
|
28
|
+
@charset = 'utf-8'
|
28
29
|
|
29
30
|
#提取链接库接口
|
30
31
|
libm = Fiddle.dlopen(File.expand_path("../../bin/NLPIR.dll", __FILE__))
|
31
32
|
|
32
33
|
NLPIR_Init_rb = Fiddle::Function.new(
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
libm['NLPIR_Init'],
|
35
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
36
|
+
Fiddle::TYPE_INT
|
36
37
|
)
|
37
|
-
NLPIR_Exit_rb = Fiddle::Function.new(
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
NLPIR_Exit_rb = Fiddle::Function.new(
|
39
|
+
libm['NLPIR_Exit'],
|
40
|
+
[],
|
41
|
+
Fiddle::TYPE_INT
|
41
42
|
)
|
42
|
-
NLPIR_ImportUserDict_rb = Fiddle::Function.new(
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
NLPIR_ImportUserDict_rb = Fiddle::Function.new(
|
44
|
+
libm['NLPIR_ImportUserDict'],
|
45
|
+
[Fiddle::TYPE_VOIDP],
|
46
|
+
Fiddle::TYPE_INT
|
46
47
|
)
|
47
|
-
NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
|
49
|
+
libm['NLPIR_ParagraphProcess'],
|
50
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
51
|
+
Fiddle::TYPE_VOIDP
|
51
52
|
)
|
52
|
-
NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
|
54
|
+
libm['NLPIR_ParagraphProcessA'],
|
55
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
|
56
|
+
Fiddle::TYPE_VOIDP
|
56
57
|
)
|
57
|
-
NLPIR_FileProcess_rb = Fiddle::Function.new(
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
NLPIR_FileProcess_rb = Fiddle::Function.new(
|
59
|
+
libm['NLPIR_FileProcess'],
|
60
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
|
61
|
+
Fiddle::TYPE_DOUBLE
|
61
62
|
)
|
62
|
-
NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
|
64
|
+
libm['NLPIR_GetParagraphProcessAWordCount'],
|
65
|
+
[Fiddle::TYPE_VOIDP],
|
66
|
+
Fiddle::TYPE_INT
|
66
67
|
)
|
67
|
-
NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
|
68
|
-
|
69
|
-
|
70
|
-
|
68
|
+
NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
|
69
|
+
libm['NLPIR_ParagraphProcessAW'],
|
70
|
+
[Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
|
71
|
+
Fiddle::TYPE_INT
|
71
72
|
)
|
72
|
-
NLPIR_AddUserWord_rb = Fiddle::Function.new(
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
NLPIR_AddUserWord_rb = Fiddle::Function.new(
|
74
|
+
libm['NLPIR_AddUserWord'],
|
75
|
+
[Fiddle::TYPE_VOIDP],
|
76
|
+
Fiddle::TYPE_INT
|
76
77
|
)
|
77
|
-
NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
|
78
|
-
|
79
|
-
|
80
|
-
|
78
|
+
NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
|
79
|
+
libm['NLPIR_SaveTheUsrDic'],
|
80
|
+
[],
|
81
|
+
Fiddle::TYPE_INT
|
81
82
|
)
|
82
|
-
NLPIR_DelUsrWord_rb = Fiddle::Function.new(
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
NLPIR_DelUsrWord_rb = Fiddle::Function.new(
|
84
|
+
libm['NLPIR_DelUsrWord'],
|
85
|
+
[Fiddle::TYPE_VOIDP],
|
86
|
+
Fiddle::TYPE_INT
|
86
87
|
)
|
87
|
-
NLPIR_GetKeyWords_rb = Fiddle::Function.new(
|
88
|
-
libm['
|
88
|
+
NLPIR_GetKeyWords_rb = Fiddle::Function.new(
|
89
|
+
libm['NLPIR_GetKeyWords'],
|
89
90
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
90
91
|
Fiddle::TYPE_VOIDP
|
91
92
|
)
|
92
|
-
NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
|
93
|
-
libm['
|
93
|
+
NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
|
94
|
+
libm['NLPIR_GetFileKeyWords'],
|
94
95
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
95
96
|
Fiddle::TYPE_VOIDP
|
96
97
|
)
|
97
|
-
NLPIR_GetNewWords_rb = Fiddle::Function.new(
|
98
|
-
libm['
|
98
|
+
NLPIR_GetNewWords_rb = Fiddle::Function.new(
|
99
|
+
libm['NLPIR_GetNewWords'],
|
99
100
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
100
101
|
Fiddle::TYPE_VOIDP
|
101
102
|
)
|
102
|
-
NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
|
103
|
-
libm['
|
103
|
+
NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
|
104
|
+
libm['NLPIR_GetFileNewWords'],
|
104
105
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
105
106
|
Fiddle::TYPE_VOIDP
|
106
107
|
)
|
107
|
-
NLPIR_FingerPrint_rb = Fiddle::Function.new(
|
108
|
-
libm['
|
108
|
+
NLPIR_FingerPrint_rb = Fiddle::Function.new(
|
109
|
+
libm['NLPIR_FingerPrint'],
|
109
110
|
[Fiddle::TYPE_VOIDP],
|
110
111
|
Fiddle::TYPE_LONG
|
111
112
|
)
|
112
|
-
NLPIR_SetPOSmap_rb = Fiddle::Function.new(
|
113
|
-
|
114
|
-
|
115
|
-
|
113
|
+
NLPIR_SetPOSmap_rb = Fiddle::Function.new(
|
114
|
+
libm['NLPIR_SetPOSmap'],
|
115
|
+
[Fiddle::TYPE_INT],
|
116
|
+
Fiddle::TYPE_INT
|
116
117
|
)
|
117
118
|
|
118
|
-
NLPIR_NWI_Start_rb = Fiddle::Function.new(
|
119
|
-
|
120
|
-
|
121
|
-
|
119
|
+
NLPIR_NWI_Start_rb = Fiddle::Function.new(
|
120
|
+
libm['NLPIR_NWI_Start'],
|
121
|
+
[],
|
122
|
+
Fiddle::TYPE_INT
|
122
123
|
)
|
123
|
-
NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
124
|
-
|
125
|
-
|
126
|
-
|
124
|
+
NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
125
|
+
libm['NLPIR_NWI_AddFile'],
|
126
|
+
[Fiddle::TYPE_VOIDP],
|
127
|
+
Fiddle::TYPE_INT
|
127
128
|
)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
129
|
+
NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
|
130
|
+
libm['NLPIR_NWI_AddMem'],
|
131
|
+
[Fiddle::TYPE_VOIDP],
|
132
|
+
Fiddle::TYPE_INT
|
132
133
|
)
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
134
|
+
NLPIR_NWI_Complete_rb = Fiddle::Function.new(
|
135
|
+
libm['NLPIR_NWI_Complete'],
|
136
|
+
[],
|
137
|
+
Fiddle::TYPE_INT
|
137
138
|
)
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
139
|
+
NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
|
140
|
+
libm['NLPIR_NWI_GetResult'],
|
141
|
+
[Fiddle::TYPE_INT],
|
142
|
+
Fiddle::TYPE_VOIDP
|
142
143
|
)
|
143
144
|
NLPIR_NWI_Result2UserDict_rb = Fiddle::Function.new(
|
144
|
-
|
145
|
-
|
146
|
-
|
145
|
+
libm['NLPIR_NWI_Result2UserDict'],
|
146
|
+
[],
|
147
|
+
Fiddle::TYPE_VOIDP
|
147
148
|
)
|
148
149
|
|
149
150
|
#--函数
|
150
151
|
|
151
|
-
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE
|
152
|
-
|
153
|
-
if File.exist?(
|
154
|
-
FileUtils.mkdir(
|
152
|
+
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE)
|
153
|
+
sInitDirPath += "/Data/"
|
154
|
+
if File.exist?(sInitDirPath)==false
|
155
|
+
FileUtils.mkdir(sInitDirPath)
|
155
156
|
filemother = File.expand_path("../Data/", __FILE__)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
NLPIR_Init_rb.call(sInitDirPath,encoding)
|
165
|
-
|
157
|
+
FileUtils.copy_entry filemother,sInitDirPath
|
158
|
+
end
|
159
|
+
@charset = 'gbk' if encoding == GBK_CODE
|
160
|
+
@charset = 'utf-8' if encoding == UTF8_CODE
|
161
|
+
@charset = 'big5' if encoding == BIG5_CODE
|
162
|
+
@charset = 'gbk' if encoding == GBK_FANTI_CODE
|
163
|
+
NLPIR_Init_rb.call(nil,encoding)
|
166
164
|
end
|
165
|
+
alias :nlpir_init :NLPIR_Init
|
167
166
|
|
168
167
|
def NLPIR_Exit()
|
169
|
-
|
170
|
-
return NLPIR_TRUE if i > 0
|
168
|
+
NLPIR_Exit_rb.call()
|
171
169
|
end
|
170
|
+
alias :nlpir_exit :NLPIR_Exit
|
172
171
|
|
173
172
|
def NLPIR_ImportUserDict(sFilename)
|
174
173
|
NLPIR_ImportUserDict_rb.call(sFilename)
|
175
174
|
end
|
175
|
+
alias :import_userdict :NLPIR_ImportUserDict
|
176
176
|
|
177
177
|
def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
|
178
|
-
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s
|
178
|
+
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset)
|
179
179
|
end
|
180
|
+
alias :text_proc :NLPIR_ParagraphProcess
|
180
181
|
|
181
182
|
def NLPIR_ParagraphProcessA(sParagraph)
|
182
183
|
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
@@ -190,86 +191,109 @@ NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
|
190
191
|
end
|
191
192
|
return words_list
|
192
193
|
end
|
194
|
+
alias :text_procA :NLPIR_ParagraphProcessA
|
193
195
|
|
194
|
-
|
195
|
-
|
196
|
-
|
196
|
+
def NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
197
|
+
NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
|
198
|
+
end
|
199
|
+
alias :text_wordcount :NLPIR_GetParagraphProcessAWordCount
|
197
200
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
+
def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
|
202
|
+
NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
|
203
|
+
end
|
204
|
+
alias :file_proc :NLPIR_FileProcess
|
201
205
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
return words_list
|
206
|
+
|
207
|
+
def NLPIR_ParagraphProcessAW(sParagraph)
|
208
|
+
free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
|
209
|
+
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
210
|
+
pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
|
211
|
+
NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
|
212
|
+
words_list = []
|
213
|
+
words_list << Result_t.new(pVecResult)
|
214
|
+
for i in 1...resultCount do
|
215
|
+
words_list << Result_t.new(pVecResult+=Result_t.size)
|
213
216
|
end
|
217
|
+
return words_list
|
218
|
+
end
|
219
|
+
alias :text_procAW :NLPIR_ParagraphProcessAW
|
214
220
|
|
215
|
-
def NLPIR_AddUserWord(sWord)
|
216
|
-
NLPIR_AddUserWord_rb.call(sWord)
|
217
|
-
end
|
218
221
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
+
def NLPIR_AddUserWord(sWord)
|
223
|
+
NLPIR_AddUserWord_rb.call(sWord)
|
224
|
+
end
|
225
|
+
alias :add_userword :NLPIR_AddUserWord
|
222
226
|
|
223
|
-
|
224
|
-
|
225
|
-
|
227
|
+
def NLPIR_SaveTheUsrDic()
|
228
|
+
NLPIR_SaveTheUsrDic_rb.call()
|
229
|
+
end
|
230
|
+
alias :save_userdict :NLPIR_SaveTheUsrDic
|
226
231
|
|
227
|
-
|
228
|
-
|
229
|
-
|
232
|
+
def NLPIR_DelUsrWord(sWord)
|
233
|
+
NLPIR_DelUsrWord_rb.call(sWord)
|
234
|
+
end
|
235
|
+
alias :del_userword :NLPIR_DelUsrWord
|
230
236
|
|
231
|
-
|
232
|
-
|
233
|
-
|
237
|
+
def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
238
|
+
NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
239
|
+
end
|
240
|
+
alias :text_keywords :NLPIR_GetKeyWords
|
234
241
|
|
235
|
-
|
236
|
-
|
237
|
-
|
242
|
+
def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
243
|
+
line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
|
244
|
+
line.force_encoding('gbk')
|
245
|
+
line.encode!(@charset)
|
246
|
+
end
|
247
|
+
alias :file_keywords :NLPIR_GetFileKeyWords
|
238
248
|
|
239
|
-
|
240
|
-
|
241
|
-
|
249
|
+
def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
250
|
+
NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
251
|
+
end
|
252
|
+
alias :text_newwords :NLPIR_GetNewWords
|
242
253
|
|
243
|
-
|
244
|
-
|
245
|
-
|
254
|
+
def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
255
|
+
NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
256
|
+
end
|
257
|
+
alias :file_newwords :NLPIR_GetFileNewWords
|
246
258
|
|
247
|
-
|
248
|
-
|
249
|
-
|
259
|
+
def NLPIR_FingerPrint(sLine)
|
260
|
+
NLPIR_FingerPrint_rb.call(sLine)
|
261
|
+
end
|
262
|
+
alias :text_fingerprint :NLPIR_FingerPrint
|
250
263
|
|
251
|
-
|
252
|
-
|
253
|
-
|
264
|
+
def NLPIR_SetPOSmap(nPOSmap)
|
265
|
+
NLPIR_SetPOSmap_rb.call(nPOSmap)
|
266
|
+
end
|
267
|
+
alias :setPOSmap :NLPIR_SetPOSmap
|
254
268
|
|
255
|
-
|
256
|
-
|
257
|
-
|
269
|
+
def NLPIR_NWI_Start()
|
270
|
+
NLPIR_NWI_Start_rb.call()
|
271
|
+
end
|
272
|
+
alias :NWI_start :NLPIR_NWI_Start
|
258
273
|
|
259
|
-
|
260
|
-
|
261
|
-
|
274
|
+
def NLPIR_NWI_AddFile(sFilename)
|
275
|
+
NLPIR_NWI_AddFile_rb.call(sFilename)
|
276
|
+
end
|
277
|
+
alias :NWI_addfile :NLPIR_NWI_AddFile
|
262
278
|
|
263
|
-
|
264
|
-
|
265
|
-
|
279
|
+
def NLPIR_NWI_AddMem(sFilename)
|
280
|
+
NLPIR_NWI_AddMem_rb.call(sFilename)
|
281
|
+
end
|
282
|
+
alias :NWI_addmem :NLPIR_NWI_AddMem
|
266
283
|
|
267
|
-
|
268
|
-
|
269
|
-
|
284
|
+
def NLPIR_NWI_Complete()
|
285
|
+
NLPIR_NWI_Complete_rb.call()
|
286
|
+
end
|
287
|
+
alias :NWI_complete :NLPIR_NWI_Complete
|
270
288
|
|
271
|
-
|
272
|
-
|
273
|
-
|
289
|
+
def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
|
290
|
+
NLPIR_NWI_GetResult_rb.call(bWeightOut)
|
291
|
+
end
|
292
|
+
alias :NWI_result :NLPIR_NWI_GetResult
|
293
|
+
|
294
|
+
def NLPIR_NWI_Result2UserDict()
|
295
|
+
NLPIR_NWI_Result2UserDict_rb.call()
|
296
|
+
end
|
297
|
+
alias :NWI_result2userdict :NLPIR_NWI_Result2UserDict
|
274
298
|
|
275
299
|
end
|