nlpir 0.0.4-x86-mingw32 → 1.0.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +146 -8
- data/bin/NLPIR.dll +0 -0
- data/lib/Data/Configure.xml +16 -15
- data/lib/Data/DocExtractor.user +2 -0
- data/lib/Data/English/English.pdat +0 -0
- data/lib/Data/English/English.pos +0 -0
- data/lib/Data/English/English.ung +0 -0
- data/lib/Data/English/English.wordlist +0 -0
- data/lib/Data/English/Irrel2regular.map +0 -0
- data/lib/Data/English/ne.pdat +0 -0
- data/lib/Data/English/ne.pos +0 -0
- data/lib/Data/English/ne.wordlist +0 -0
- data/lib/Data/FTU8.pdat +0 -0
- data/lib/Data/FTU8.wordlist +0 -0
- data/lib/Data/FTU82GBK.map +0 -0
- data/lib/Data/FieldDict.pdat +0 -0
- data/lib/Data/FieldDict.pos +0 -0
- data/lib/Data/GBK2FTU8.map +0 -0
- data/lib/Data/ICTPOS.map +4 -0
- data/lib/Data/NLPIR.user +0 -0
- data/lib/Data/NewWord.lst +18 -15
- data/lib/Data/PKU.map +4 -0
- data/lib/Data/PKU_First.map +4 -0
- data/lib/Data/UserDict.pdat +0 -0
- data/lib/Data/location.map +0 -0
- data/lib/Data/location.pdat +0 -0
- data/lib/Data/location.wordlist +0 -0
- data/lib/Data/sentiment.pdat +0 -0
- data/lib/Data/sentiment.ung +0 -0
- data/lib/nlpir.rb +187 -163
- data/lib/nlpir/version.rb +1 -1
- data/test/Data/Configure.xml +16 -15
- data/test/Data/DocExtractor.user +2 -0
- data/test/Data/English/English.pdat +0 -0
- data/test/Data/English/English.pos +0 -0
- data/test/Data/English/English.ung +0 -0
- data/test/Data/English/English.wordlist +0 -0
- data/test/Data/English/Irrel2regular.map +0 -0
- data/test/Data/English/ne.pdat +0 -0
- data/test/Data/English/ne.pos +0 -0
- data/test/Data/English/ne.wordlist +0 -0
- data/test/Data/FTU8.pdat +0 -0
- data/test/Data/FTU8.wordlist +0 -0
- data/test/Data/FTU82GBK.map +0 -0
- data/test/Data/GBK2FTU8.map +0 -0
- data/test/Data/ICTPOS.map +4 -0
- data/test/Data/NLPIR.user +0 -0
- data/test/Data/NewWord.lst +18 -63
- data/test/Data/PKU.map +4 -0
- data/test/Data/PKU_First.map +4 -0
- data/test/Data/UserDict.pdat +0 -0
- data/test/Data/location.map +0 -0
- data/test/Data/location.pdat +0 -0
- data/test/Data/location.wordlist +0 -0
- data/test/Data/sentiment.pdat +0 -0
- data/test/Data/sentiment.ung +0 -0
- data/test/findnewword.txt +103 -0
- data/test/test_nlpir.rb +137 -140
- data/test/test_result.txt +52 -35
- data/test/userdict.txt +5 -5
- metadata +59 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 01b964073b5822742a58c2b2c1e438ee599bf466
|
4
|
+
data.tar.gz: a0293eb0eed9928e6ca2b7d4abbbd0fa3572e46a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d76198cc3d39291a3d14c1b79b60a975cefd37a8ffdf50e9afddd2ae0439f1710f990bc79eb9ad1d4e7a9498befd0921abf264c868715c4394db5e65bfa5c8c0
|
7
|
+
data.tar.gz: fd64bb7fd5f1d5baa4686edb1854631f062bc71dc64bfb4305a514c96e35c55c0c60ebce1184d3c3563d3b01f841e6cf8ff3b41bdbc173e2401460d87ab795bb
|
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# Nlpir_win
|
2
2
|
|
3
|
-
A rubygem wrapper of chinese segment tools
|
3
|
+
A rubygem wrapper of chinese segment tools ICTCLAS2014
|
4
4
|
|
5
|
-
Nlpir version 0.0.4 , gem nlpir-0.0.4-x86-mingw32 support '
|
5
|
+
Nlpir version 0.0.4 , gem nlpir-0.0.4-x86-mingw32 support '~ ruby2.0.0' on win7. For *nix OS is [nlpir](https://github.com/JoeWoo/nlpir)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -45,8 +45,148 @@ some DEFINE you may use :
|
|
45
45
|
```
|
46
46
|
|
47
47
|
after you gem install it:
|
48
|
+
##ruby-style func
|
49
|
+
```ruby
|
50
|
+
require 'nlpir'
|
51
|
+
include Nlpir
|
48
52
|
|
49
|
-
|
53
|
+
s = "坚定不移沿着中国特色社会主义道路前进 为全面建成小康社会而奋斗"
|
54
|
+
#first of all : Call the NLPIR API nlpir_init
|
55
|
+
|
56
|
+
nlpir_init(File.expand_path("../", __FILE__),UTF8_CODE)
|
57
|
+
|
58
|
+
#example1: Process a paragraph, and return the result text with POS or not
|
59
|
+
puts text_proc(s, NLPIR_TRUE)
|
60
|
+
puts text_proc(s, NLPIR_FALSE)
|
61
|
+
|
62
|
+
#example2: Process a paragraph, and return an array filled elements are POSed words.
|
63
|
+
#tips: text_procA() return the array, and its memory is malloced by NLPIR, it will be freed by nlpir_exit() (memory in server)
|
64
|
+
|
65
|
+
words_list = text_procA(s)
|
66
|
+
i=1
|
67
|
+
words_list.each do |a|
|
68
|
+
sWhichDic=""
|
69
|
+
case a.word_type
|
70
|
+
when 0
|
71
|
+
sWhichDic = "核心词典"
|
72
|
+
when 1
|
73
|
+
sWhichDic = "用户词典"
|
74
|
+
when 2
|
75
|
+
sWhichDic = "专业词典"
|
76
|
+
end
|
77
|
+
puts "No.#{i}:start:#{a.start}, length:#{a.length}, POS_ID:#{a.sPOS},word_ID:#{a.word_ID},word_type:#{a.word_type} , UserDefine:#{sWhichDic}, Word:#{s.byteslice(a.start,a.length)}, Weight:#{a.weight}\n"
|
78
|
+
i += 1
|
79
|
+
end
|
80
|
+
|
81
|
+
#example3: Process a paragraph, and return an array filled elements are POSed words.
|
82
|
+
#tips: text_procAW() return the array, and its memory is malloced by ruby::fiddle,and be collect by GC (memory in agent)
|
83
|
+
|
84
|
+
words_list = text_procAW(s)
|
85
|
+
i=1
|
86
|
+
words_list.each do |a|
|
87
|
+
sWhichDic=""
|
88
|
+
case a.word_type
|
89
|
+
when 0
|
90
|
+
sWhichDic = "核心词典"
|
91
|
+
when 1
|
92
|
+
sWhichDic = "用户词典"
|
93
|
+
when 2
|
94
|
+
sWhichDic = "专业词典"
|
95
|
+
end
|
96
|
+
puts "No.#{i}:start:#{a.start}, length:#{a.length}, POS_ID:#{a.sPOS},word_ID:#{a.word_ID},word_type:#{a.word_type} , UserDefine:#{sWhichDic}, Word:#{s.byteslice(a.start,a.length)}, Weight:#{a.weight}\n"
|
97
|
+
i += 1
|
98
|
+
end
|
99
|
+
|
100
|
+
#example4: Process a text file, and wirte the result text to file
|
101
|
+
puts file_proc("./test.txt", "./test_result.txt", NULL)
|
102
|
+
|
103
|
+
|
104
|
+
#example5: Get ProcessAWordCount, it returns the count of the words
|
105
|
+
puts count = file_wordcount(s)
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
#example6: Add/Delete a word to the user dictionary (the path of user dictionary of the path is ./data/userdict.dpat)
|
110
|
+
puts text_proc("我们都是爱思客")
|
111
|
+
#add a user word
|
112
|
+
add_userword("都是爱思客 n")
|
113
|
+
add_userword("思客 n")
|
114
|
+
add_userword("你是 n")
|
115
|
+
add_userword("都是客 n")
|
116
|
+
add_userword("都是爱 n")
|
117
|
+
puts text_proc("我们都是爱思客")
|
118
|
+
#save the user word to disk
|
119
|
+
save_userdict()
|
120
|
+
puts text_proc("我们都是爱思客")
|
121
|
+
#delete a user word
|
122
|
+
del_userword("都是爱思客")
|
123
|
+
save_userdict()
|
124
|
+
puts text_proc("我们都是爱思客")
|
125
|
+
|
126
|
+
|
127
|
+
#example7: Import user-defined dictionary from a text file. and puts NLPIR result
|
128
|
+
puts text_proc("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
129
|
+
puts import_userdict("./userdict.txt")
|
130
|
+
#you can see the example file: ./userdict.txt to know the userdict`s format requirements
|
131
|
+
save_userdict()
|
132
|
+
puts text_proc("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
133
|
+
|
134
|
+
|
135
|
+
#example8: Get keywords of text
|
136
|
+
#2nd parameter is the MaxNumber of keywords
|
137
|
+
#3rd parameter is a swith to show the WeightOut or not
|
138
|
+
puts text_keywords(s, 50,NLPIR_TRUE)
|
139
|
+
|
140
|
+
|
141
|
+
#example9: Get keywords from file
|
142
|
+
puts file_keywords("./test.txt",50, NLPIR_TRUE)
|
143
|
+
|
144
|
+
|
145
|
+
#example10: Find new words from text
|
146
|
+
puts text_newwords(s, 50, NLPIR_TRUE)
|
147
|
+
|
148
|
+
|
149
|
+
#example11: Find new words from file
|
150
|
+
puts file_newwords("./test.txt")
|
151
|
+
|
152
|
+
|
153
|
+
#example12: Extract a finger print from the paragraph
|
154
|
+
puts text_fingerprint(s)
|
155
|
+
|
156
|
+
|
157
|
+
#example13: select which pos map will use
|
158
|
+
#ICT_POS_MAP_FIRST #//计算所一级标注集
|
159
|
+
#ICT_POS_MAP_SECOND #//计算所二级标注集
|
160
|
+
#PKU_POS_MAP_SECOND #//北大二级标注集
|
161
|
+
#PKU_POS_MAP_FIRST #//北大一级标注集
|
162
|
+
setPOSmap(ICT_POS_MAP_FIRST)
|
163
|
+
puts text_proc(s)
|
164
|
+
setPOSmap(PKU_POS_MAP_FIRST)
|
165
|
+
puts text_proc(s)
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
# 新词发现批量处理功能
|
170
|
+
#以下函数为2013版本专门针对新词发现的过程,一般建议脱机实现,不宜在线处理
|
171
|
+
# 新词识别完成后,再自动导入到分词系统中,即可完成
|
172
|
+
|
173
|
+
NWI_start() #启动新词发现功能
|
174
|
+
f=File.new("test.txt", "r")
|
175
|
+
text=f.read
|
176
|
+
NWI_addfile(text)#添加新词训练的文件,可反复添加
|
177
|
+
NWI_complete()#添加文件或者训练内容结束
|
178
|
+
f.close()
|
179
|
+
puts NWI_result()#输出新词识别结果
|
180
|
+
#puts file_proc("a.txt","b.txt")
|
181
|
+
NWI_result2userdict()#新词识别结果导入到用户词典
|
182
|
+
|
183
|
+
|
184
|
+
#at the end call NLPIR_Exit() to free system materials
|
185
|
+
nlpir_exit()
|
186
|
+
|
187
|
+
|
188
|
+
```
|
189
|
+
##c-style func
|
50
190
|
|
51
191
|
```ruby
|
52
192
|
|
@@ -171,12 +311,9 @@ also can see some examples from test cases [here](https://github.com/JoeWoo/nlpi
|
|
171
311
|
# 新词识别完成后,再自动导入到分词系统中,即可完成
|
172
312
|
|
173
313
|
NLPIR_NWI_Start() #启动新词发现功能
|
174
|
-
|
175
|
-
text=f.read
|
176
|
-
NLPIR_NWI_AddFile(text)#添加新词训练的文件,可反复添加
|
314
|
+
NLPIR_NWI_AddFile("./text.txt")#添加新词训练的文件,可反复添加
|
177
315
|
NLPIR_NWI_Complete()#添加文件或者训练内容结束
|
178
|
-
|
179
|
-
puts NLPIR_NWI_GetResult()#输出新词识别结果
|
316
|
+
puts NLPIR_NWI_GetResult().to_s#输出新词识别结果 可传入一个参数NLPIR_TRUE或NLPIR_FALSE,用于是否输出词性
|
180
317
|
#puts NLPIR_FileProcess("a.txt","b.txt")
|
181
318
|
NLPIR_NWI_Result2UserDict()#新词识别结果导入到用户词典
|
182
319
|
|
@@ -186,6 +323,7 @@ also can see some examples from test cases [here](https://github.com/JoeWoo/nlpi
|
|
186
323
|
|
187
324
|
```
|
188
325
|
|
326
|
+
|
189
327
|
## Contributing
|
190
328
|
|
191
329
|
1. Fork it
|
data/bin/NLPIR.dll
CHANGED
Binary file
|
data/lib/Data/Configure.xml
CHANGED
@@ -1,15 +1,16 @@
|
|
1
|
-
<?xmlversion="1.0"encoding="GB2312"?>
|
2
|
-
<NLPIR>
|
3
|
-
<TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
|
4
|
-
<UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
|
5
|
-
<UserDictPrior>
|
6
|
-
<FieldDict>
|
7
|
-
<GranularityContorl>off</GranularityContorl>
|
8
|
-
<Log>
|
9
|
-
<version>2013</version>//ϵͳ�汾��
|
10
|
-
<Modify>2012-11-14</Modify>//ϵͳ�����ʱ��
|
11
|
-
<Lexicon>2012-11-14</Lexicon>//�ʵ������ʱ��
|
12
|
-
<
|
13
|
-
<
|
14
|
-
<
|
15
|
-
</
|
1
|
+
<?xmlversion="1.0"encoding="GB2312"?>
|
2
|
+
<NLPIR>
|
3
|
+
<TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
|
4
|
+
<UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
|
5
|
+
<UserDictPrior>Off</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
|
6
|
+
<FieldDict>on</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
|
7
|
+
<GranularityContorl>off</GranularityContorl>
|
8
|
+
<Log>Off</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
|
9
|
+
<version>2013</version>//ϵͳ�汾��
|
10
|
+
<Modify>2012-11-14</Modify>//ϵͳ�����ʱ��
|
11
|
+
<Lexicon>2012-11-14</Lexicon>//�ʵ������ʱ��
|
12
|
+
<Sentiment>On</Sentiment>//On,Off�����磺Off:�ر���з������ܣ�On:����з�������
|
13
|
+
<adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
|
14
|
+
<author>�Ż�ƽ��ʿ</author>//����
|
15
|
+
<Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
|
16
|
+
</NLPIR>
|
@@ -0,0 +1,2 @@
|
|
1
|
+
���ť��ݸܸ��������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�ábQBw6u|V
|
2
|
+
-rt�����������I+g���蟋�����'"&t#''������̣ռ�܃�����רע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ���������˛��̷���ޜ��̍�Ӟ�����������ā���و����ҁ��䂜�˃�������ᔆ���Oww.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��עЮ������. /=-++<���ɩв�ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿ����������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����Fww.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������ϓ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ����������������順�ӧ����������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע������@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע�������㡢��Ȼ���Դ���
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/FTU8.pdat
ADDED
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/FieldDict.pdat
CHANGED
Binary file
|
data/lib/Data/FieldDict.pos
CHANGED
Binary file
|
Binary file
|
data/lib/Data/ICTPOS.map
CHANGED
data/lib/Data/NLPIR.user
CHANGED
Binary file
|
data/lib/Data/NewWord.lst
CHANGED
@@ -1,25 +1,28 @@
|
|
1
|
+
|
1
2
|
��˿ ���� � ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�
|
3
|
+
����ɽ �ΰ������� ���������� �������� ��Ʒ���� ��������
|
4
|
+
�й���ɫ������� ������������ ��ѧ��չ�� ��̬���� ��ṫƽ���� �������� �л�����ΰ���� ���幦���� �����ռ俪�� ��������� ���緢չһ�廯 С����� �������� ��������Ļ�ǿ�� ��ҵ��λ ���� ����ѡ�� �������� ���� ��������г��������� ��������� �������� ȫ������ ��Ҫս�Ի��� ȡ���ش��չ ��̬ϵͳ �Ȳ��ƽ� ���� �ӿ�ת�� ���ʽ���Σ�� ���¶����� ������������ ��������Ƚ��Ļ� ũҵ�ۺ��������� ʳƷҩƷ��ȫ ��۵�����ϵ ����ͬԸ�� �������� ˾������ �������ϱ��� ȫ���л���Ů ս�������˲�ҵ ����Ϸ�Ȩ�� ȫ������ͬ ������������ ��ǰ����ƽ�� �Ļ���ʵ�� ��֤�������� ��������ϵ ȫ��ҽ�� ����ֿ� ����������չ �����Ч �����ƶȸĸ� �������� ����δ�� ʵ�徭�� ��ѧ��ˮƽ ���㵳�� ���Ȼ� ȫ�潨��С����� �������� ��ʳ��ȫ ����ʵ�� ����ɲ� �������� �������� ���ر��� ����ڵ� ��ѧ�ش� �������� ���������� ��������� ��ȫ��в ��������� �����Ļ� ������� ����Ӱ���� �Ծ����� �Ͷ����� �ִ�����ҵ �������� ���˹�ͬ�� ��ʵ�ƽ� �Ҹ����� ���ļ�ֵ�� ���η��� ����������� ������ ƽ�Ȼ��� ������ҡ��� ����� ��Ҫս�Ի����� ת�侭�÷�չ��ʽ �ӿ�ת�侭�÷�չ��ʽ ���������ļ�ֵ��ϵ ����ȡ���ش��չ ʵ���л�����ΰ���� �����ں�ʽ��չ ��������������Ȼ� �ĸ↑�� ά����ṫƽ����
|
2
5
|
��˿ ���� � ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�
|
6
|
+
��˿ ���� � ��˿�Ļ� ��Q �������Ļ� Ⱥ���Գ� ���ݱ� ��˧
|
7
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#��˧#
|
3
8
|
|
4
9
|
|
5
10
|
|
6
11
|
|
7
12
|
|
8
13
|
|
14
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
15
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
9
16
|
|
10
17
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
18
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
19
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
20
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
21
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
22
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
23
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
24
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
25
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
26
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
27
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
28
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
data/lib/Data/PKU.map
CHANGED
data/lib/Data/PKU_First.map
CHANGED
data/lib/Data/UserDict.pdat
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/nlpir.rb
CHANGED
@@ -3,7 +3,7 @@ require File.expand_path("../nlpir/version", __FILE__)
|
|
3
3
|
require 'fiddle'
|
4
4
|
require 'fiddle/struct'
|
5
5
|
require 'fiddle/import'
|
6
|
-
require 'fileutils'
|
6
|
+
require 'fileutils'
|
7
7
|
include Fiddle::CParser
|
8
8
|
include Fiddle::Importer
|
9
9
|
|
@@ -14,169 +14,170 @@ module Nlpir
|
|
14
14
|
ICT_POS_MAP_FIRST = 1 #计算所一级标注集
|
15
15
|
ICT_POS_MAP_SECOND = 0 #计算所二级标注集
|
16
16
|
PKU_POS_MAP_SECOND = 2 #北大二级标注集
|
17
|
-
PKU_POS_MAP_FIRST = 3
|
17
|
+
PKU_POS_MAP_FIRST = 3 #北大一级标注集
|
18
18
|
POS_SIZE = 40
|
19
19
|
|
20
20
|
Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
|
21
|
-
|
22
|
-
|
21
|
+
'int word_ID','int word_type','int weight']
|
22
|
+
|
23
23
|
GBK_CODE = 0 #默认支持GBK编码
|
24
24
|
UTF8_CODE = GBK_CODE + 1 #UTF8编码
|
25
25
|
BIG5_CODE = GBK_CODE + 2 #BIG5编码
|
26
26
|
GBK_FANTI_CODE = GBK_CODE + 3 #GBK编码,里面包含繁体字
|
27
27
|
|
28
|
+
@charset = 'utf-8'
|
28
29
|
|
29
30
|
#提取链接库接口
|
30
31
|
libm = Fiddle.dlopen(File.expand_path("../../bin/NLPIR.dll", __FILE__))
|
31
32
|
|
32
33
|
NLPIR_Init_rb = Fiddle::Function.new(
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
libm['NLPIR_Init'],
|
35
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
36
|
+
Fiddle::TYPE_INT
|
36
37
|
)
|
37
|
-
NLPIR_Exit_rb = Fiddle::Function.new(
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
NLPIR_Exit_rb = Fiddle::Function.new(
|
39
|
+
libm['NLPIR_Exit'],
|
40
|
+
[],
|
41
|
+
Fiddle::TYPE_INT
|
41
42
|
)
|
42
|
-
NLPIR_ImportUserDict_rb = Fiddle::Function.new(
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
NLPIR_ImportUserDict_rb = Fiddle::Function.new(
|
44
|
+
libm['NLPIR_ImportUserDict'],
|
45
|
+
[Fiddle::TYPE_VOIDP],
|
46
|
+
Fiddle::TYPE_INT
|
46
47
|
)
|
47
|
-
NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
|
49
|
+
libm['NLPIR_ParagraphProcess'],
|
50
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
51
|
+
Fiddle::TYPE_VOIDP
|
51
52
|
)
|
52
|
-
NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
|
54
|
+
libm['NLPIR_ParagraphProcessA'],
|
55
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
|
56
|
+
Fiddle::TYPE_VOIDP
|
56
57
|
)
|
57
|
-
NLPIR_FileProcess_rb = Fiddle::Function.new(
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
NLPIR_FileProcess_rb = Fiddle::Function.new(
|
59
|
+
libm['NLPIR_FileProcess'],
|
60
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
|
61
|
+
Fiddle::TYPE_DOUBLE
|
61
62
|
)
|
62
|
-
NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
|
64
|
+
libm['NLPIR_GetParagraphProcessAWordCount'],
|
65
|
+
[Fiddle::TYPE_VOIDP],
|
66
|
+
Fiddle::TYPE_INT
|
66
67
|
)
|
67
|
-
NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
|
68
|
-
|
69
|
-
|
70
|
-
|
68
|
+
NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
|
69
|
+
libm['NLPIR_ParagraphProcessAW'],
|
70
|
+
[Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
|
71
|
+
Fiddle::TYPE_INT
|
71
72
|
)
|
72
|
-
NLPIR_AddUserWord_rb = Fiddle::Function.new(
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
NLPIR_AddUserWord_rb = Fiddle::Function.new(
|
74
|
+
libm['NLPIR_AddUserWord'],
|
75
|
+
[Fiddle::TYPE_VOIDP],
|
76
|
+
Fiddle::TYPE_INT
|
76
77
|
)
|
77
|
-
NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
|
78
|
-
|
79
|
-
|
80
|
-
|
78
|
+
NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
|
79
|
+
libm['NLPIR_SaveTheUsrDic'],
|
80
|
+
[],
|
81
|
+
Fiddle::TYPE_INT
|
81
82
|
)
|
82
|
-
NLPIR_DelUsrWord_rb = Fiddle::Function.new(
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
NLPIR_DelUsrWord_rb = Fiddle::Function.new(
|
84
|
+
libm['NLPIR_DelUsrWord'],
|
85
|
+
[Fiddle::TYPE_VOIDP],
|
86
|
+
Fiddle::TYPE_INT
|
86
87
|
)
|
87
|
-
NLPIR_GetKeyWords_rb = Fiddle::Function.new(
|
88
|
-
libm['
|
88
|
+
NLPIR_GetKeyWords_rb = Fiddle::Function.new(
|
89
|
+
libm['NLPIR_GetKeyWords'],
|
89
90
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
90
91
|
Fiddle::TYPE_VOIDP
|
91
92
|
)
|
92
|
-
NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
|
93
|
-
libm['
|
93
|
+
NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
|
94
|
+
libm['NLPIR_GetFileKeyWords'],
|
94
95
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
95
96
|
Fiddle::TYPE_VOIDP
|
96
97
|
)
|
97
|
-
NLPIR_GetNewWords_rb = Fiddle::Function.new(
|
98
|
-
libm['
|
98
|
+
NLPIR_GetNewWords_rb = Fiddle::Function.new(
|
99
|
+
libm['NLPIR_GetNewWords'],
|
99
100
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
100
101
|
Fiddle::TYPE_VOIDP
|
101
102
|
)
|
102
|
-
NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
|
103
|
-
libm['
|
103
|
+
NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
|
104
|
+
libm['NLPIR_GetFileNewWords'],
|
104
105
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
105
106
|
Fiddle::TYPE_VOIDP
|
106
107
|
)
|
107
|
-
NLPIR_FingerPrint_rb = Fiddle::Function.new(
|
108
|
-
libm['
|
108
|
+
NLPIR_FingerPrint_rb = Fiddle::Function.new(
|
109
|
+
libm['NLPIR_FingerPrint'],
|
109
110
|
[Fiddle::TYPE_VOIDP],
|
110
111
|
Fiddle::TYPE_LONG
|
111
112
|
)
|
112
|
-
NLPIR_SetPOSmap_rb = Fiddle::Function.new(
|
113
|
-
|
114
|
-
|
115
|
-
|
113
|
+
NLPIR_SetPOSmap_rb = Fiddle::Function.new(
|
114
|
+
libm['NLPIR_SetPOSmap'],
|
115
|
+
[Fiddle::TYPE_INT],
|
116
|
+
Fiddle::TYPE_INT
|
116
117
|
)
|
117
118
|
|
118
|
-
NLPIR_NWI_Start_rb = Fiddle::Function.new(
|
119
|
-
|
120
|
-
|
121
|
-
|
119
|
+
NLPIR_NWI_Start_rb = Fiddle::Function.new(
|
120
|
+
libm['NLPIR_NWI_Start'],
|
121
|
+
[],
|
122
|
+
Fiddle::TYPE_INT
|
122
123
|
)
|
123
|
-
NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
124
|
-
|
125
|
-
|
126
|
-
|
124
|
+
NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
125
|
+
libm['NLPIR_NWI_AddFile'],
|
126
|
+
[Fiddle::TYPE_VOIDP],
|
127
|
+
Fiddle::TYPE_INT
|
127
128
|
)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
129
|
+
NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
|
130
|
+
libm['NLPIR_NWI_AddMem'],
|
131
|
+
[Fiddle::TYPE_VOIDP],
|
132
|
+
Fiddle::TYPE_INT
|
132
133
|
)
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
134
|
+
NLPIR_NWI_Complete_rb = Fiddle::Function.new(
|
135
|
+
libm['NLPIR_NWI_Complete'],
|
136
|
+
[],
|
137
|
+
Fiddle::TYPE_INT
|
137
138
|
)
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
139
|
+
NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
|
140
|
+
libm['NLPIR_NWI_GetResult'],
|
141
|
+
[Fiddle::TYPE_INT],
|
142
|
+
Fiddle::TYPE_VOIDP
|
142
143
|
)
|
143
144
|
NLPIR_NWI_Result2UserDict_rb = Fiddle::Function.new(
|
144
|
-
|
145
|
-
|
146
|
-
|
145
|
+
libm['NLPIR_NWI_Result2UserDict'],
|
146
|
+
[],
|
147
|
+
Fiddle::TYPE_VOIDP
|
147
148
|
)
|
148
149
|
|
149
150
|
#--函数
|
150
151
|
|
151
|
-
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE
|
152
|
-
|
153
|
-
if File.exist?(
|
154
|
-
FileUtils.mkdir(
|
152
|
+
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE)
|
153
|
+
sInitDirPath += "/Data/"
|
154
|
+
if File.exist?(sInitDirPath)==false
|
155
|
+
FileUtils.mkdir(sInitDirPath)
|
155
156
|
filemother = File.expand_path("../Data/", __FILE__)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
NLPIR_Init_rb.call(sInitDirPath,encoding)
|
165
|
-
|
157
|
+
FileUtils.copy_entry filemother,sInitDirPath
|
158
|
+
end
|
159
|
+
@charset = 'gbk' if encoding == GBK_CODE
|
160
|
+
@charset = 'utf-8' if encoding == UTF8_CODE
|
161
|
+
@charset = 'big5' if encoding == BIG5_CODE
|
162
|
+
@charset = 'gbk' if encoding == GBK_FANTI_CODE
|
163
|
+
NLPIR_Init_rb.call(nil,encoding)
|
166
164
|
end
|
165
|
+
alias :nlpir_init :NLPIR_Init
|
167
166
|
|
168
167
|
def NLPIR_Exit()
|
169
|
-
|
170
|
-
return NLPIR_TRUE if i > 0
|
168
|
+
NLPIR_Exit_rb.call()
|
171
169
|
end
|
170
|
+
alias :nlpir_exit :NLPIR_Exit
|
172
171
|
|
173
172
|
def NLPIR_ImportUserDict(sFilename)
|
174
173
|
NLPIR_ImportUserDict_rb.call(sFilename)
|
175
174
|
end
|
175
|
+
alias :import_userdict :NLPIR_ImportUserDict
|
176
176
|
|
177
177
|
def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
|
178
|
-
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s
|
178
|
+
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset)
|
179
179
|
end
|
180
|
+
alias :text_proc :NLPIR_ParagraphProcess
|
180
181
|
|
181
182
|
def NLPIR_ParagraphProcessA(sParagraph)
|
182
183
|
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
@@ -190,86 +191,109 @@ NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
|
190
191
|
end
|
191
192
|
return words_list
|
192
193
|
end
|
194
|
+
alias :text_procA :NLPIR_ParagraphProcessA
|
193
195
|
|
194
|
-
|
195
|
-
|
196
|
-
|
196
|
+
def NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
197
|
+
NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
|
198
|
+
end
|
199
|
+
alias :text_wordcount :NLPIR_GetParagraphProcessAWordCount
|
197
200
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
+
def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
|
202
|
+
NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
|
203
|
+
end
|
204
|
+
alias :file_proc :NLPIR_FileProcess
|
201
205
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
return words_list
|
206
|
+
|
207
|
+
def NLPIR_ParagraphProcessAW(sParagraph)
|
208
|
+
free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
|
209
|
+
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
210
|
+
pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
|
211
|
+
NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
|
212
|
+
words_list = []
|
213
|
+
words_list << Result_t.new(pVecResult)
|
214
|
+
for i in 1...resultCount do
|
215
|
+
words_list << Result_t.new(pVecResult+=Result_t.size)
|
213
216
|
end
|
217
|
+
return words_list
|
218
|
+
end
|
219
|
+
alias :text_procAW :NLPIR_ParagraphProcessAW
|
214
220
|
|
215
|
-
def NLPIR_AddUserWord(sWord)
|
216
|
-
NLPIR_AddUserWord_rb.call(sWord)
|
217
|
-
end
|
218
221
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
+
def NLPIR_AddUserWord(sWord)
|
223
|
+
NLPIR_AddUserWord_rb.call(sWord)
|
224
|
+
end
|
225
|
+
alias :add_userword :NLPIR_AddUserWord
|
222
226
|
|
223
|
-
|
224
|
-
|
225
|
-
|
227
|
+
def NLPIR_SaveTheUsrDic()
|
228
|
+
NLPIR_SaveTheUsrDic_rb.call()
|
229
|
+
end
|
230
|
+
alias :save_userdict :NLPIR_SaveTheUsrDic
|
226
231
|
|
227
|
-
|
228
|
-
|
229
|
-
|
232
|
+
def NLPIR_DelUsrWord(sWord)
|
233
|
+
NLPIR_DelUsrWord_rb.call(sWord)
|
234
|
+
end
|
235
|
+
alias :del_userword :NLPIR_DelUsrWord
|
230
236
|
|
231
|
-
|
232
|
-
|
233
|
-
|
237
|
+
def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
238
|
+
NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
239
|
+
end
|
240
|
+
alias :text_keywords :NLPIR_GetKeyWords
|
234
241
|
|
235
|
-
|
236
|
-
|
237
|
-
|
242
|
+
def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
243
|
+
line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
|
244
|
+
line.force_encoding('gbk')
|
245
|
+
line.encode!(@charset)
|
246
|
+
end
|
247
|
+
alias :file_keywords :NLPIR_GetFileKeyWords
|
238
248
|
|
239
|
-
|
240
|
-
|
241
|
-
|
249
|
+
def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
250
|
+
NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
251
|
+
end
|
252
|
+
alias :text_newwords :NLPIR_GetNewWords
|
242
253
|
|
243
|
-
|
244
|
-
|
245
|
-
|
254
|
+
def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
255
|
+
NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
256
|
+
end
|
257
|
+
alias :file_newwords :NLPIR_GetFileNewWords
|
246
258
|
|
247
|
-
|
248
|
-
|
249
|
-
|
259
|
+
def NLPIR_FingerPrint(sLine)
|
260
|
+
NLPIR_FingerPrint_rb.call(sLine)
|
261
|
+
end
|
262
|
+
alias :text_fingerprint :NLPIR_FingerPrint
|
250
263
|
|
251
|
-
|
252
|
-
|
253
|
-
|
264
|
+
def NLPIR_SetPOSmap(nPOSmap)
|
265
|
+
NLPIR_SetPOSmap_rb.call(nPOSmap)
|
266
|
+
end
|
267
|
+
alias :setPOSmap :NLPIR_SetPOSmap
|
254
268
|
|
255
|
-
|
256
|
-
|
257
|
-
|
269
|
+
def NLPIR_NWI_Start()
|
270
|
+
NLPIR_NWI_Start_rb.call()
|
271
|
+
end
|
272
|
+
alias :NWI_start :NLPIR_NWI_Start
|
258
273
|
|
259
|
-
|
260
|
-
|
261
|
-
|
274
|
+
def NLPIR_NWI_AddFile(sFilename)
|
275
|
+
NLPIR_NWI_AddFile_rb.call(sFilename)
|
276
|
+
end
|
277
|
+
alias :NWI_addfile :NLPIR_NWI_AddFile
|
262
278
|
|
263
|
-
|
264
|
-
|
265
|
-
|
279
|
+
def NLPIR_NWI_AddMem(sFilename)
|
280
|
+
NLPIR_NWI_AddMem_rb.call(sFilename)
|
281
|
+
end
|
282
|
+
alias :NWI_addmem :NLPIR_NWI_AddMem
|
266
283
|
|
267
|
-
|
268
|
-
|
269
|
-
|
284
|
+
def NLPIR_NWI_Complete()
|
285
|
+
NLPIR_NWI_Complete_rb.call()
|
286
|
+
end
|
287
|
+
alias :NWI_complete :NLPIR_NWI_Complete
|
270
288
|
|
271
|
-
|
272
|
-
|
273
|
-
|
289
|
+
def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
|
290
|
+
NLPIR_NWI_GetResult_rb.call(bWeightOut)
|
291
|
+
end
|
292
|
+
alias :NWI_result :NLPIR_NWI_GetResult
|
293
|
+
|
294
|
+
def NLPIR_NWI_Result2UserDict()
|
295
|
+
NLPIR_NWI_Result2UserDict_rb.call()
|
296
|
+
end
|
297
|
+
alias :NWI_result2userdict :NLPIR_NWI_Result2UserDict
|
274
298
|
|
275
299
|
end
|