nlpir 0.0.4-x86-mingw32 → 1.0.0-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +146 -8
  3. data/bin/NLPIR.dll +0 -0
  4. data/lib/Data/Configure.xml +16 -15
  5. data/lib/Data/DocExtractor.user +2 -0
  6. data/lib/Data/English/English.pdat +0 -0
  7. data/lib/Data/English/English.pos +0 -0
  8. data/lib/Data/English/English.ung +0 -0
  9. data/lib/Data/English/English.wordlist +0 -0
  10. data/lib/Data/English/Irrel2regular.map +0 -0
  11. data/lib/Data/English/ne.pdat +0 -0
  12. data/lib/Data/English/ne.pos +0 -0
  13. data/lib/Data/English/ne.wordlist +0 -0
  14. data/lib/Data/FTU8.pdat +0 -0
  15. data/lib/Data/FTU8.wordlist +0 -0
  16. data/lib/Data/FTU82GBK.map +0 -0
  17. data/lib/Data/FieldDict.pdat +0 -0
  18. data/lib/Data/FieldDict.pos +0 -0
  19. data/lib/Data/GBK2FTU8.map +0 -0
  20. data/lib/Data/ICTPOS.map +4 -0
  21. data/lib/Data/NLPIR.user +0 -0
  22. data/lib/Data/NewWord.lst +18 -15
  23. data/lib/Data/PKU.map +4 -0
  24. data/lib/Data/PKU_First.map +4 -0
  25. data/lib/Data/UserDict.pdat +0 -0
  26. data/lib/Data/location.map +0 -0
  27. data/lib/Data/location.pdat +0 -0
  28. data/lib/Data/location.wordlist +0 -0
  29. data/lib/Data/sentiment.pdat +0 -0
  30. data/lib/Data/sentiment.ung +0 -0
  31. data/lib/nlpir.rb +187 -163
  32. data/lib/nlpir/version.rb +1 -1
  33. data/test/Data/Configure.xml +16 -15
  34. data/test/Data/DocExtractor.user +2 -0
  35. data/test/Data/English/English.pdat +0 -0
  36. data/test/Data/English/English.pos +0 -0
  37. data/test/Data/English/English.ung +0 -0
  38. data/test/Data/English/English.wordlist +0 -0
  39. data/test/Data/English/Irrel2regular.map +0 -0
  40. data/test/Data/English/ne.pdat +0 -0
  41. data/test/Data/English/ne.pos +0 -0
  42. data/test/Data/English/ne.wordlist +0 -0
  43. data/test/Data/FTU8.pdat +0 -0
  44. data/test/Data/FTU8.wordlist +0 -0
  45. data/test/Data/FTU82GBK.map +0 -0
  46. data/test/Data/GBK2FTU8.map +0 -0
  47. data/test/Data/ICTPOS.map +4 -0
  48. data/test/Data/NLPIR.user +0 -0
  49. data/test/Data/NewWord.lst +18 -63
  50. data/test/Data/PKU.map +4 -0
  51. data/test/Data/PKU_First.map +4 -0
  52. data/test/Data/UserDict.pdat +0 -0
  53. data/test/Data/location.map +0 -0
  54. data/test/Data/location.pdat +0 -0
  55. data/test/Data/location.wordlist +0 -0
  56. data/test/Data/sentiment.pdat +0 -0
  57. data/test/Data/sentiment.ung +0 -0
  58. data/test/findnewword.txt +103 -0
  59. data/test/test_nlpir.rb +137 -140
  60. data/test/test_result.txt +52 -35
  61. data/test/userdict.txt +5 -5
  62. metadata +59 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 65987eefe0d616b08e0f6659c43cd8b79469dab1
4
- data.tar.gz: 9fe53a61bea4bd9a877665c6f4edc096c5f80365
3
+ metadata.gz: 01b964073b5822742a58c2b2c1e438ee599bf466
4
+ data.tar.gz: a0293eb0eed9928e6ca2b7d4abbbd0fa3572e46a
5
5
  SHA512:
6
- metadata.gz: b08f5d4d63371d12f7a73bf810ab97f8e2288d52e4a60913c29ba8797c55567d6c5a96b95a243079e0ef57cb8a15a5e7188a954f37c7588d6314bb558ecc1367
7
- data.tar.gz: b48b25cee53d3b7158acce346bd608eec8ce97628949d3740f3c133bee7efa86cfe53ba2fe1b0710e1d4c91472593aeef2f0f8aaa930e3c8b7bf8878e9953943
6
+ metadata.gz: d76198cc3d39291a3d14c1b79b60a975cefd37a8ffdf50e9afddd2ae0439f1710f990bc79eb9ad1d4e7a9498befd0921abf264c868715c4394db5e65bfa5c8c0
7
+ data.tar.gz: fd64bb7fd5f1d5baa4686edb1854631f062bc71dc64bfb4305a514c96e35c55c0c60ebce1184d3c3563d3b01f841e6cf8ff3b41bdbc173e2401460d87ab795bb
data/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  # Nlpir_win
2
2
 
3
- A rubygem wrapper of chinese segment tools ICTCLAS2013
3
+ A rubygem wrapper of chinese segment tools ICTCLAS2014
4
4
 
5
- Nlpir version 0.0.4 , gem nlpir-0.0.4-x86-mingw32 support '>=ruby2.0.0' on win7.
5
+ Nlpir version 0.0.4 , gem nlpir-0.0.4-x86-mingw32 support '~ ruby2.0.0' on win7. For *nix OS is [nlpir](https://github.com/JoeWoo/nlpir)
6
6
 
7
7
  ## Installation
8
8
 
@@ -45,8 +45,148 @@ some DEFINE you may use :
45
45
  ```
46
46
 
47
47
  after you gem install it:
48
+ ##ruby-style func
49
+ ```ruby
50
+ require 'nlpir'
51
+ include Nlpir
48
52
 
49
- also can see some examples from test cases [here](https://github.com/JoeWoo/nlpir_win/blob/master/test/test_nlpir.rb)
53
+ s = "坚定不移沿着中国特色社会主义道路前进 为全面建成小康社会而奋斗"
54
+ #first of all : Call the NLPIR API nlpir_init
55
+
56
+ nlpir_init(File.expand_path("../", __FILE__),UTF8_CODE)
57
+
58
+ #example1: Process a paragraph, and return the result text with POS or not
59
+ puts text_proc(s, NLPIR_TRUE)
60
+ puts text_proc(s, NLPIR_FALSE)
61
+
62
+ #example2: Process a paragraph, and return an array filled elements are POSed words.
63
+ #tips: text_procA() return the array, and its memory is malloced by NLPIR, it will be freed by nlpir_exit() (memory in server)
64
+
65
+ words_list = text_procA(s)
66
+ i=1
67
+ words_list.each do |a|
68
+ sWhichDic=""
69
+ case a.word_type
70
+ when 0
71
+ sWhichDic = "核心词典"
72
+ when 1
73
+ sWhichDic = "用户词典"
74
+ when 2
75
+ sWhichDic = "专业词典"
76
+ end
77
+ puts "No.#{i}:start:#{a.start}, length:#{a.length}, POS_ID:#{a.sPOS},word_ID:#{a.word_ID},word_type:#{a.word_type} , UserDefine:#{sWhichDic}, Word:#{s.byteslice(a.start,a.length)}, Weight:#{a.weight}\n"
78
+ i += 1
79
+ end
80
+
81
+ #example3: Process a paragraph, and return an array filled elements are POSed words.
82
+ #tips: text_procAW() return the array, and its memory is malloced by ruby::fiddle,and be collect by GC (memory in agent)
83
+
84
+ words_list = text_procAW(s)
85
+ i=1
86
+ words_list.each do |a|
87
+ sWhichDic=""
88
+ case a.word_type
89
+ when 0
90
+ sWhichDic = "核心词典"
91
+ when 1
92
+ sWhichDic = "用户词典"
93
+ when 2
94
+ sWhichDic = "专业词典"
95
+ end
96
+ puts "No.#{i}:start:#{a.start}, length:#{a.length}, POS_ID:#{a.sPOS},word_ID:#{a.word_ID},word_type:#{a.word_type} , UserDefine:#{sWhichDic}, Word:#{s.byteslice(a.start,a.length)}, Weight:#{a.weight}\n"
97
+ i += 1
98
+ end
99
+
100
+ #example4: Process a text file, and wirte the result text to file
101
+ puts file_proc("./test.txt", "./test_result.txt", NULL)
102
+
103
+
104
+ #example5: Get ProcessAWordCount, it returns the count of the words
105
+ puts count = file_wordcount(s)
106
+
107
+
108
+
109
+ #example6: Add/Delete a word to the user dictionary (the path of user dictionary of the path is ./data/userdict.dpat)
110
+ puts text_proc("我们都是爱思客")
111
+ #add a user word
112
+ add_userword("都是爱思客 n")
113
+ add_userword("思客 n")
114
+ add_userword("你是 n")
115
+ add_userword("都是客 n")
116
+ add_userword("都是爱 n")
117
+ puts text_proc("我们都是爱思客")
118
+ #save the user word to disk
119
+ save_userdict()
120
+ puts text_proc("我们都是爱思客")
121
+ #delete a user word
122
+ del_userword("都是爱思客")
123
+ save_userdict()
124
+ puts text_proc("我们都是爱思客")
125
+
126
+
127
+ #example7: Import user-defined dictionary from a text file. and puts NLPIR result
128
+ puts text_proc("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
129
+ puts import_userdict("./userdict.txt")
130
+ #you can see the example file: ./userdict.txt to know the userdict`s format requirements
131
+ save_userdict()
132
+ puts text_proc("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
133
+
134
+
135
+ #example8: Get keywords of text
136
+ #2nd parameter is the MaxNumber of keywords
137
+ #3rd parameter is a swith to show the WeightOut or not
138
+ puts text_keywords(s, 50,NLPIR_TRUE)
139
+
140
+
141
+ #example9: Get keywords from file
142
+ puts file_keywords("./test.txt",50, NLPIR_TRUE)
143
+
144
+
145
+ #example10: Find new words from text
146
+ puts text_newwords(s, 50, NLPIR_TRUE)
147
+
148
+
149
+ #example11: Find new words from file
150
+ puts file_newwords("./test.txt")
151
+
152
+
153
+ #example12: Extract a finger print from the paragraph
154
+ puts text_fingerprint(s)
155
+
156
+
157
+ #example13: select which pos map will use
158
+ #ICT_POS_MAP_FIRST #//计算所一级标注集
159
+ #ICT_POS_MAP_SECOND #//计算所二级标注集
160
+ #PKU_POS_MAP_SECOND #//北大二级标注集
161
+ #PKU_POS_MAP_FIRST #//北大一级标注集
162
+ setPOSmap(ICT_POS_MAP_FIRST)
163
+ puts text_proc(s)
164
+ setPOSmap(PKU_POS_MAP_FIRST)
165
+ puts text_proc(s)
166
+
167
+
168
+
169
+ # 新词发现批量处理功能
170
+ #以下函数为2013版本专门针对新词发现的过程,一般建议脱机实现,不宜在线处理
171
+ # 新词识别完成后,再自动导入到分词系统中,即可完成
172
+
173
+ NWI_start() #启动新词发现功能
174
+ f=File.new("test.txt", "r")
175
+ text=f.read
176
+ NWI_addfile(text)#添加新词训练的文件,可反复添加
177
+ NWI_complete()#添加文件或者训练内容结束
178
+ f.close()
179
+ puts NWI_result()#输出新词识别结果
180
+ #puts file_proc("a.txt","b.txt")
181
+ NWI_result2userdict()#新词识别结果导入到用户词典
182
+
183
+
184
+ #at the end call NLPIR_Exit() to free system materials
185
+ nlpir_exit()
186
+
187
+
188
+ ```
189
+ ##c-style func
50
190
 
51
191
  ```ruby
52
192
 
@@ -171,12 +311,9 @@ also can see some examples from test cases [here](https://github.com/JoeWoo/nlpi
171
311
  # 新词识别完成后,再自动导入到分词系统中,即可完成
172
312
 
173
313
  NLPIR_NWI_Start() #启动新词发现功能
174
- f=File.new("test.txt", "r")
175
- text=f.read
176
- NLPIR_NWI_AddFile(text)#添加新词训练的文件,可反复添加
314
+ NLPIR_NWI_AddFile("./text.txt")#添加新词训练的文件,可反复添加
177
315
  NLPIR_NWI_Complete()#添加文件或者训练内容结束
178
- f.close()
179
- puts NLPIR_NWI_GetResult()#输出新词识别结果
316
+ puts NLPIR_NWI_GetResult().to_s#输出新词识别结果 可传入一个参数NLPIR_TRUE或NLPIR_FALSE,用于是否输出词性
180
317
  #puts NLPIR_FileProcess("a.txt","b.txt")
181
318
  NLPIR_NWI_Result2UserDict()#新词识别结果导入到用户词典
182
319
 
@@ -186,6 +323,7 @@ also can see some examples from test cases [here](https://github.com/JoeWoo/nlpi
186
323
 
187
324
  ```
188
325
 
326
+
189
327
  ## Contributing
190
328
 
191
329
  1. Fork it
Binary file
@@ -1,15 +1,16 @@
1
- <?xmlversion="1.0"encoding="GB2312"?>
2
- <NLPIR>
3
- <TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
4
- <UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
5
- <UserDictPrior>On</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
6
- <FieldDict>off</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
7
- <GranularityContorl>off</GranularityContorl>
8
- <Log>On</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
9
- <version>2013</version>//ϵͳ�汾��
10
- <Modify>2012-11-14</Modify>//ϵͳ����޶�ʱ��
11
- <Lexicon>2012-11-14</Lexicon>//�ʵ�����޶�ʱ��
12
- <adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
13
- <author>�Ż�ƽ��ʿ</author>//����
14
- <Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
15
- </NLPIR>
1
+ <?xmlversion="1.0"encoding="GB2312"?>
2
+ <NLPIR>
3
+ <TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
4
+ <UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
5
+ <UserDictPrior>Off</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
6
+ <FieldDict>on</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
7
+ <GranularityContorl>off</GranularityContorl>
8
+ <Log>Off</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
9
+ <version>2013</version>//ϵͳ�汾��
10
+ <Modify>2012-11-14</Modify>//ϵͳ����޶�ʱ��
11
+ <Lexicon>2012-11-14</Lexicon>//�ʵ�����޶�ʱ��
12
+ <Sentiment>On</Sentiment>//On,Off�����磺Off:�ر���з������ܣ�On:����з�������
13
+ <adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
14
+ <author>�Ż�ƽ��ʿ</author>//����
15
+ <Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
16
+ </NLPIR>
@@ -0,0 +1,2 @@
1
+ ���ť��ݸ޴ܸ���΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�ábQBw6u|V
2
+ -rt�����������I+g���蟋�����'"&t#''������̣ռ�܃�����רע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ���������˛��̷���ޜ��̍�Ӟ�����������ā���و����ҁ��䂜�񖔁˃�������ᔆ���Oww.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��עЮ������. /=-++<���ɩв�ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿ�����΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����Fww.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������ϓ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ����������������順�ӧ����������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ���
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -94,3 +94,7 @@ wyz
94
94
  x
95
95
  y
96
96
  z
97
+ xu
98
+ xe
99
+ xs
100
+ xm
Binary file
@@ -1,25 +1,28 @@
1
+
1
2
  ��˿ ���� �⹹ ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�΢
3
+ ����ɽ �ΰ������� ���������� �������� ��Ʒ���� ��������
4
+ �й���ɫ������� �����������׶� ��ѧ��չ�� ��̬���� ��ṫƽ���� �������� �л�����ΰ���� ���幦���� �����ռ俪�� ��������� ���緢չһ�廯 С����� �������� ��������Ļ�ǿ�� ��ҵ��λ ���� ����ѡ�� �������� ���� ��������г��������� ��������� �������� ȫ������ ��Ҫս�Ի��� ȡ���ش��չ ��̬ϵͳ �Ȳ��ƽ� ���� �ӿ�ת�� ���ʽ���Σ�� ���¶���׼�� ������������ ��������Ƚ��Ļ� ũҵ�ۺ��������� ʳƷҩƷ��ȫ ��۵�����ϵ ����ͬԸ�� �����͹��� ˾������ �������ϱ��� ȫ���л���Ů ս�������˲�ҵ ����Ϸ�Ȩ�� ȫ������ͬ ������������ ��ǰ����ƽ�� �Ļ���ʵ�� ��֤���񵱼����� ��������ϵ ȫ��ҽ�� ����ֿ� ����������չ �����Ч �����ƶȸĸ� �������� ����δ�� ʵ�徭�� ��ѧ��ˮƽ ���㵳�� ���Ȼ� ȫ�潨��С����� �������� ��ʳ��ȫ ����ʵ�� ����ɲ� �������� �������� ���ر��� ����ڵ� ��ѧ�ش� �������� ���������� ��������� ��ȫ��в ��������� �����Ļ� ������� ����Ӱ���� �Ծ����� �Ͷ����� �ִ�����ҵ �������� ���˹�ͬ�� ��ʵ�ƽ� �Ҹ����� ���ļ�ֵ�� ���η��� ����������� ������ ƽ�Ȼ��� ������ҡ��� ����� ��Ҫս�Ի����� ת�侭�÷�չ��ʽ �ӿ�ת�侭�÷�չ��ʽ ���������ļ�ֵ��ϵ ����ȡ���ش��չ ʵ���л�����ΰ���� �����ں�ʽ��չ ��������������Ȼ� �ĸ↑�� ά����ṫƽ����
2
5
  ��˿ ���� �⹹ ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�΢
6
+ ��˿ ���� �⹹ ��˿�Ļ� ��Q �������Ļ� Ⱥ���Գ� ���ݱ�΢ ��˧
7
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#��˧#
3
8
 
4
9
 
5
10
 
6
11
 
7
12
 
8
13
 
14
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
15
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
9
16
 
10
17
 
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-
24
-
25
-
18
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
19
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
20
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
21
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
22
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
23
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
24
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
25
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
26
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
27
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
28
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
@@ -94,3 +94,7 @@ w
94
94
  x
95
95
  y
96
96
  z
97
+ xu
98
+ xe
99
+ xs
100
+ xm
@@ -94,3 +94,7 @@ w
94
94
  x
95
95
  y
96
96
  z
97
+ x
98
+ x
99
+ x
100
+ x
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -3,7 +3,7 @@ require File.expand_path("../nlpir/version", __FILE__)
3
3
  require 'fiddle'
4
4
  require 'fiddle/struct'
5
5
  require 'fiddle/import'
6
- require 'fileutils'
6
+ require 'fileutils'
7
7
  include Fiddle::CParser
8
8
  include Fiddle::Importer
9
9
 
@@ -14,169 +14,170 @@ module Nlpir
14
14
  ICT_POS_MAP_FIRST = 1 #计算所一级标注集
15
15
  ICT_POS_MAP_SECOND = 0 #计算所二级标注集
16
16
  PKU_POS_MAP_SECOND = 2 #北大二级标注集
17
- PKU_POS_MAP_FIRST = 3 #北大一级标注集
17
+ PKU_POS_MAP_FIRST = 3 #北大一级标注集
18
18
  POS_SIZE = 40
19
19
 
20
20
  Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
21
- 'int word_ID','int word_type','int weight']
22
-
21
+ 'int word_ID','int word_type','int weight']
22
+
23
23
  GBK_CODE = 0 #默认支持GBK编码
24
24
  UTF8_CODE = GBK_CODE + 1 #UTF8编码
25
25
  BIG5_CODE = GBK_CODE + 2 #BIG5编码
26
26
  GBK_FANTI_CODE = GBK_CODE + 3 #GBK编码,里面包含繁体字
27
27
 
28
+ @charset = 'utf-8'
28
29
 
29
30
  #提取链接库接口
30
31
  libm = Fiddle.dlopen(File.expand_path("../../bin/NLPIR.dll", __FILE__))
31
32
 
32
33
  NLPIR_Init_rb = Fiddle::Function.new(
33
- libm['?NLPIR_Init@@YA_NPBDH@Z'],
34
- [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
35
- Fiddle::TYPE_INT
34
+ libm['NLPIR_Init'],
35
+ [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
36
+ Fiddle::TYPE_INT
36
37
  )
37
- NLPIR_Exit_rb = Fiddle::Function.new(
38
- libm['?NLPIR_Exit@@YA_NXZ'],
39
- [],
40
- Fiddle::TYPE_INT
38
+ NLPIR_Exit_rb = Fiddle::Function.new(
39
+ libm['NLPIR_Exit'],
40
+ [],
41
+ Fiddle::TYPE_INT
41
42
  )
42
- NLPIR_ImportUserDict_rb = Fiddle::Function.new(
43
- libm['?NLPIR_ImportUserDict@@YAIPBD@Z'],
44
- [Fiddle::TYPE_VOIDP],
45
- Fiddle::TYPE_INT
43
+ NLPIR_ImportUserDict_rb = Fiddle::Function.new(
44
+ libm['NLPIR_ImportUserDict'],
45
+ [Fiddle::TYPE_VOIDP],
46
+ Fiddle::TYPE_INT
46
47
  )
47
- NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
48
- libm['?NLPIR_ParagraphProcess@@YAPBDPBDH@Z'],
49
- [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
50
- Fiddle::TYPE_VOIDP
48
+ NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
49
+ libm['NLPIR_ParagraphProcess'],
50
+ [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
51
+ Fiddle::TYPE_VOIDP
51
52
  )
52
- NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
53
- libm['?NLPIR_ParagraphProcessA@@YAPBUresult_t@@PBDPAH_N@Z'],
54
- [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
55
- Fiddle::TYPE_VOIDP
53
+ NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
54
+ libm['NLPIR_ParagraphProcessA'],
55
+ [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
56
+ Fiddle::TYPE_VOIDP
56
57
  )
57
- NLPIR_FileProcess_rb = Fiddle::Function.new(
58
- libm['?NLPIR_FileProcess@@YANPBD0H@Z'],
59
- [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
60
- Fiddle::TYPE_DOUBLE
58
+ NLPIR_FileProcess_rb = Fiddle::Function.new(
59
+ libm['NLPIR_FileProcess'],
60
+ [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
61
+ Fiddle::TYPE_DOUBLE
61
62
  )
62
- NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
63
- libm['?NLPIR_GetParagraphProcessAWordCount@@YAHPBD@Z'],
64
- [Fiddle::TYPE_VOIDP],
65
- Fiddle::TYPE_INT
63
+ NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
64
+ libm['NLPIR_GetParagraphProcessAWordCount'],
65
+ [Fiddle::TYPE_VOIDP],
66
+ Fiddle::TYPE_INT
66
67
  )
67
- NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
68
- libm['?NLPIR_ParagraphProcessAW@@YAXHPAUresult_t@@@Z'],
69
- [Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
70
- Fiddle::TYPE_INT
68
+ NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
69
+ libm['NLPIR_ParagraphProcessAW'],
70
+ [Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
71
+ Fiddle::TYPE_INT
71
72
  )
72
- NLPIR_AddUserWord_rb = Fiddle::Function.new(
73
- libm['?NLPIR_AddUserWord@@YAHPBD@Z'],
74
- [Fiddle::TYPE_VOIDP],
75
- Fiddle::TYPE_INT
73
+ NLPIR_AddUserWord_rb = Fiddle::Function.new(
74
+ libm['NLPIR_AddUserWord'],
75
+ [Fiddle::TYPE_VOIDP],
76
+ Fiddle::TYPE_INT
76
77
  )
77
- NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
78
- libm['?NLPIR_SaveTheUsrDic@@YAHXZ'],
79
- [],
80
- Fiddle::TYPE_INT
78
+ NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
79
+ libm['NLPIR_SaveTheUsrDic'],
80
+ [],
81
+ Fiddle::TYPE_INT
81
82
  )
82
- NLPIR_DelUsrWord_rb = Fiddle::Function.new(
83
- libm['?NLPIR_DelUsrWord@@YAHPBD@Z'],
84
- [Fiddle::TYPE_VOIDP],
85
- Fiddle::TYPE_INT
83
+ NLPIR_DelUsrWord_rb = Fiddle::Function.new(
84
+ libm['NLPIR_DelUsrWord'],
85
+ [Fiddle::TYPE_VOIDP],
86
+ Fiddle::TYPE_INT
86
87
  )
87
- NLPIR_GetKeyWords_rb = Fiddle::Function.new(
88
- libm['?NLPIR_GetKeyWords@@YAPBDPBDH_N@Z'],
88
+ NLPIR_GetKeyWords_rb = Fiddle::Function.new(
89
+ libm['NLPIR_GetKeyWords'],
89
90
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
90
91
  Fiddle::TYPE_VOIDP
91
92
  )
92
- NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
93
- libm['?NLPIR_GetFileKeyWords@@YAPBDPBDH_N@Z'],
93
+ NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
94
+ libm['NLPIR_GetFileKeyWords'],
94
95
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
95
96
  Fiddle::TYPE_VOIDP
96
97
  )
97
- NLPIR_GetNewWords_rb = Fiddle::Function.new(
98
- libm['?NLPIR_GetNewWords@@YAPBDPBDH_N@Z'],
98
+ NLPIR_GetNewWords_rb = Fiddle::Function.new(
99
+ libm['NLPIR_GetNewWords'],
99
100
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
100
101
  Fiddle::TYPE_VOIDP
101
102
  )
102
- NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
103
- libm['?NLPIR_GetFileNewWords@@YAPBDPBDH_N@Z'],
103
+ NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
104
+ libm['NLPIR_GetFileNewWords'],
104
105
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
105
106
  Fiddle::TYPE_VOIDP
106
107
  )
107
- NLPIR_FingerPrint_rb = Fiddle::Function.new(
108
- libm['?NLPIR_FingerPrint@@YAKPBD@Z'],
108
+ NLPIR_FingerPrint_rb = Fiddle::Function.new(
109
+ libm['NLPIR_FingerPrint'],
109
110
  [Fiddle::TYPE_VOIDP],
110
111
  Fiddle::TYPE_LONG
111
112
  )
112
- NLPIR_SetPOSmap_rb = Fiddle::Function.new(
113
- libm['?NLPIR_SetPOSmap@@YAHH@Z'],
114
- [Fiddle::TYPE_INT],
115
- Fiddle::TYPE_INT
113
+ NLPIR_SetPOSmap_rb = Fiddle::Function.new(
114
+ libm['NLPIR_SetPOSmap'],
115
+ [Fiddle::TYPE_INT],
116
+ Fiddle::TYPE_INT
116
117
  )
117
118
 
118
- NLPIR_NWI_Start_rb = Fiddle::Function.new(
119
- libm['?NLPIR_NWI_Start@@YA_NXZ'],
120
- [],
121
- Fiddle::TYPE_INT
119
+ NLPIR_NWI_Start_rb = Fiddle::Function.new(
120
+ libm['NLPIR_NWI_Start'],
121
+ [],
122
+ Fiddle::TYPE_INT
122
123
  )
123
- NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
124
- libm['?NLPIR_NWI_AddFile@@YAHPBD@Z'],
125
- [Fiddle::TYPE_VOIDP],
126
- Fiddle::TYPE_INT
124
+ NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
125
+ libm['NLPIR_NWI_AddFile'],
126
+ [Fiddle::TYPE_VOIDP],
127
+ Fiddle::TYPE_INT
127
128
  )
128
- NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
129
- libm['?NLPIR_NWI_AddMem@@YA_NPBD@Z'],
130
- [Fiddle::TYPE_VOIDP],
131
- Fiddle::TYPE_INT
129
+ NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
130
+ libm['NLPIR_NWI_AddMem'],
131
+ [Fiddle::TYPE_VOIDP],
132
+ Fiddle::TYPE_INT
132
133
  )
133
- NLPIR_NWI_Complete_rb = Fiddle::Function.new(
134
- libm['?NLPIR_NWI_Complete@@YA_NXZ'],
135
- [],
136
- Fiddle::TYPE_INT
134
+ NLPIR_NWI_Complete_rb = Fiddle::Function.new(
135
+ libm['NLPIR_NWI_Complete'],
136
+ [],
137
+ Fiddle::TYPE_INT
137
138
  )
138
- NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
139
- libm['?NLPIR_NWI_GetResult@@YAPBD_N@Z'],
140
- [Fiddle::TYPE_INT],
141
- Fiddle::TYPE_VOIDP
139
+ NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
140
+ libm['NLPIR_NWI_GetResult'],
141
+ [Fiddle::TYPE_INT],
142
+ Fiddle::TYPE_VOIDP
142
143
  )
143
144
  NLPIR_NWI_Result2UserDict_rb = Fiddle::Function.new(
144
- libm['?NLPIR_NWI_Result2UserDict@@YAIXZ'],
145
- [],
146
- Fiddle::TYPE_VOIDP
145
+ libm['NLPIR_NWI_Result2UserDict'],
146
+ [],
147
+ Fiddle::TYPE_VOIDP
147
148
  )
148
149
 
149
150
  #--函数
150
151
 
151
- def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE, filepath)
152
- filepath += "/Data/"
153
- if File.exist?(filepath)==false
154
- FileUtils.mkdir(filepath)
152
+ def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE)
153
+ sInitDirPath += "/Data/"
154
+ if File.exist?(sInitDirPath)==false
155
+ FileUtils.mkdir(sInitDirPath)
155
156
  filemother = File.expand_path("../Data/", __FILE__)
156
- list=Dir.entries(filemother)
157
- list.each_index do |x|
158
- t = filemother+"/"+list[x]
159
- FileUtils.cp(t,filepath) if !File.directory?(t)
160
- end
161
- end
162
-
163
-
164
- NLPIR_Init_rb.call(sInitDirPath,encoding)
165
-
157
+ FileUtils.copy_entry filemother,sInitDirPath
158
+ end
159
+ @charset = 'gbk' if encoding == GBK_CODE
160
+ @charset = 'utf-8' if encoding == UTF8_CODE
161
+ @charset = 'big5' if encoding == BIG5_CODE
162
+ @charset = 'gbk' if encoding == GBK_FANTI_CODE
163
+ NLPIR_Init_rb.call(nil,encoding)
166
164
  end
165
+ alias :nlpir_init :NLPIR_Init
167
166
 
168
167
  def NLPIR_Exit()
169
- i = NLPIR_Exit_rb.call()
170
- return NLPIR_TRUE if i > 0
168
+ NLPIR_Exit_rb.call()
171
169
  end
170
+ alias :nlpir_exit :NLPIR_Exit
172
171
 
173
172
  def NLPIR_ImportUserDict(sFilename)
174
173
  NLPIR_ImportUserDict_rb.call(sFilename)
175
174
  end
175
+ alias :import_userdict :NLPIR_ImportUserDict
176
176
 
177
177
  def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
178
- NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s
178
+ NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset)
179
179
  end
180
+ alias :text_proc :NLPIR_ParagraphProcess
180
181
 
181
182
  def NLPIR_ParagraphProcessA(sParagraph)
182
183
  resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
@@ -190,86 +191,109 @@ NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
190
191
  end
191
192
  return words_list
192
193
  end
194
+ alias :text_procA :NLPIR_ParagraphProcessA
193
195
 
194
- def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
195
- NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
196
- end
196
+ def NLPIR_GetParagraphProcessAWordCount(sParagraph)
197
+ NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
198
+ end
199
+ alias :text_wordcount :NLPIR_GetParagraphProcessAWordCount
197
200
 
198
- def NLPIR_GetParagraphProcessAWordCount(sParagraph)
199
- NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
200
- end
201
+ def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
202
+ NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
203
+ end
204
+ alias :file_proc :NLPIR_FileProcess
201
205
 
202
- def NLPIR_ParagraphProcessAW(sParagraph)
203
- free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
204
- resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
205
- pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
206
- NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
207
- words_list = []
208
- words_list << Result_t.new(pVecResult)
209
- for i in 1...resultCount do
210
- words_list << Result_t.new(pVecResult+=Result_t.size)
211
- end
212
- return words_list
206
+
207
+ def NLPIR_ParagraphProcessAW(sParagraph)
208
+ free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
209
+ resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
210
+ pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
211
+ NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
212
+ words_list = []
213
+ words_list << Result_t.new(pVecResult)
214
+ for i in 1...resultCount do
215
+ words_list << Result_t.new(pVecResult+=Result_t.size)
213
216
  end
217
+ return words_list
218
+ end
219
+ alias :text_procAW :NLPIR_ParagraphProcessAW
214
220
 
215
- def NLPIR_AddUserWord(sWord)
216
- NLPIR_AddUserWord_rb.call(sWord)
217
- end
218
221
 
219
- def NLPIR_SaveTheUsrDic()
220
- NLPIR_SaveTheUsrDic_rb.call()
221
- end
222
+ def NLPIR_AddUserWord(sWord)
223
+ NLPIR_AddUserWord_rb.call(sWord)
224
+ end
225
+ alias :add_userword :NLPIR_AddUserWord
222
226
 
223
- def NLPIR_DelUsrWord(sWord)
224
- NLPIR_DelUsrWord_rb.call(sWord)
225
- end
227
+ def NLPIR_SaveTheUsrDic()
228
+ NLPIR_SaveTheUsrDic_rb.call()
229
+ end
230
+ alias :save_userdict :NLPIR_SaveTheUsrDic
226
231
 
227
- def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
228
- NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s
229
- end
232
+ def NLPIR_DelUsrWord(sWord)
233
+ NLPIR_DelUsrWord_rb.call(sWord)
234
+ end
235
+ alias :del_userword :NLPIR_DelUsrWord
230
236
 
231
- def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
232
- NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
233
- end
237
+ def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
238
+ NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
239
+ end
240
+ alias :text_keywords :NLPIR_GetKeyWords
234
241
 
235
- def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
236
- NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s
237
- end
242
+ def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
243
+ line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
244
+ line.force_encoding('gbk')
245
+ line.encode!(@charset)
246
+ end
247
+ alias :file_keywords :NLPIR_GetFileKeyWords
238
248
 
239
- def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
240
- NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
241
- end
249
+ def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
250
+ NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
251
+ end
252
+ alias :text_newwords :NLPIR_GetNewWords
242
253
 
243
- def NLPIR_FingerPrint(sLine)
244
- NLPIR_FingerPrint_rb.call(sLine)
245
- end
254
+ def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
255
+ NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
256
+ end
257
+ alias :file_newwords :NLPIR_GetFileNewWords
246
258
 
247
- def NLPIR_SetPOSmap(nPOSmap)
248
- NLPIR_SetPOSmap_rb.call(nPOSmap)
249
- end
259
+ def NLPIR_FingerPrint(sLine)
260
+ NLPIR_FingerPrint_rb.call(sLine)
261
+ end
262
+ alias :text_fingerprint :NLPIR_FingerPrint
250
263
 
251
- def NLPIR_NWI_Start()
252
- NLPIR_NWI_Start_rb.call()
253
- end
264
+ def NLPIR_SetPOSmap(nPOSmap)
265
+ NLPIR_SetPOSmap_rb.call(nPOSmap)
266
+ end
267
+ alias :setPOSmap :NLPIR_SetPOSmap
254
268
 
255
- def NLPIR_NWI_AddFile(sFilename)
256
- NLPIR_NWI_AddFile_rb.call(sFilename)
257
- end
269
+ def NLPIR_NWI_Start()
270
+ NLPIR_NWI_Start_rb.call()
271
+ end
272
+ alias :NWI_start :NLPIR_NWI_Start
258
273
 
259
- def NLPIR_NWI_AddMem(sFilename)
260
- NLPIR_NWI_AddMem_rb.call(sFilename)
261
- end
274
+ def NLPIR_NWI_AddFile(sFilename)
275
+ NLPIR_NWI_AddFile_rb.call(sFilename)
276
+ end
277
+ alias :NWI_addfile :NLPIR_NWI_AddFile
262
278
 
263
- def NLPIR_NWI_Complete()
264
- NLPIR_NWI_Complete_rb.call()
265
- end
279
+ def NLPIR_NWI_AddMem(sFilename)
280
+ NLPIR_NWI_AddMem_rb.call(sFilename)
281
+ end
282
+ alias :NWI_addmem :NLPIR_NWI_AddMem
266
283
 
267
- def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
268
- NLPIR_NWI_GetResult_rb.call(bWeightOut)
269
- end
284
+ def NLPIR_NWI_Complete()
285
+ NLPIR_NWI_Complete_rb.call()
286
+ end
287
+ alias :NWI_complete :NLPIR_NWI_Complete
270
288
 
271
- def NLPIR_NWI_Result2UserDict()
272
- NLPIR_NWI_Result2UserDict_rb.call()
273
- end
289
+ def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
290
+ NLPIR_NWI_GetResult_rb.call(bWeightOut)
291
+ end
292
+ alias :NWI_result :NLPIR_NWI_GetResult
293
+
294
+ def NLPIR_NWI_Result2UserDict()
295
+ NLPIR_NWI_Result2UserDict_rb.call()
296
+ end
297
+ alias :NWI_result2userdict :NLPIR_NWI_Result2UserDict
274
298
 
275
299
  end