nlpir 0.0.4-x86-mingw32 → 1.0.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +146 -8
  3. data/bin/NLPIR.dll +0 -0
  4. data/lib/Data/Configure.xml +16 -15
  5. data/lib/Data/DocExtractor.user +2 -0
  6. data/lib/Data/English/English.pdat +0 -0
  7. data/lib/Data/English/English.pos +0 -0
  8. data/lib/Data/English/English.ung +0 -0
  9. data/lib/Data/English/English.wordlist +0 -0
  10. data/lib/Data/English/Irrel2regular.map +0 -0
  11. data/lib/Data/English/ne.pdat +0 -0
  12. data/lib/Data/English/ne.pos +0 -0
  13. data/lib/Data/English/ne.wordlist +0 -0
  14. data/lib/Data/FTU8.pdat +0 -0
  15. data/lib/Data/FTU8.wordlist +0 -0
  16. data/lib/Data/FTU82GBK.map +0 -0
  17. data/lib/Data/FieldDict.pdat +0 -0
  18. data/lib/Data/FieldDict.pos +0 -0
  19. data/lib/Data/GBK2FTU8.map +0 -0
  20. data/lib/Data/ICTPOS.map +4 -0
  21. data/lib/Data/NLPIR.user +0 -0
  22. data/lib/Data/NewWord.lst +18 -15
  23. data/lib/Data/PKU.map +4 -0
  24. data/lib/Data/PKU_First.map +4 -0
  25. data/lib/Data/UserDict.pdat +0 -0
  26. data/lib/Data/location.map +0 -0
  27. data/lib/Data/location.pdat +0 -0
  28. data/lib/Data/location.wordlist +0 -0
  29. data/lib/Data/sentiment.pdat +0 -0
  30. data/lib/Data/sentiment.ung +0 -0
  31. data/lib/nlpir.rb +187 -163
  32. data/lib/nlpir/version.rb +1 -1
  33. data/test/Data/Configure.xml +16 -15
  34. data/test/Data/DocExtractor.user +2 -0
  35. data/test/Data/English/English.pdat +0 -0
  36. data/test/Data/English/English.pos +0 -0
  37. data/test/Data/English/English.ung +0 -0
  38. data/test/Data/English/English.wordlist +0 -0
  39. data/test/Data/English/Irrel2regular.map +0 -0
  40. data/test/Data/English/ne.pdat +0 -0
  41. data/test/Data/English/ne.pos +0 -0
  42. data/test/Data/English/ne.wordlist +0 -0
  43. data/test/Data/FTU8.pdat +0 -0
  44. data/test/Data/FTU8.wordlist +0 -0
  45. data/test/Data/FTU82GBK.map +0 -0
  46. data/test/Data/GBK2FTU8.map +0 -0
  47. data/test/Data/ICTPOS.map +4 -0
  48. data/test/Data/NLPIR.user +0 -0
  49. data/test/Data/NewWord.lst +18 -63
  50. data/test/Data/PKU.map +4 -0
  51. data/test/Data/PKU_First.map +4 -0
  52. data/test/Data/UserDict.pdat +0 -0
  53. data/test/Data/location.map +0 -0
  54. data/test/Data/location.pdat +0 -0
  55. data/test/Data/location.wordlist +0 -0
  56. data/test/Data/sentiment.pdat +0 -0
  57. data/test/Data/sentiment.ung +0 -0
  58. data/test/findnewword.txt +103 -0
  59. data/test/test_nlpir.rb +137 -140
  60. data/test/test_result.txt +52 -35
  61. data/test/userdict.txt +5 -5
  62. metadata +59 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 65987eefe0d616b08e0f6659c43cd8b79469dab1
4
- data.tar.gz: 9fe53a61bea4bd9a877665c6f4edc096c5f80365
3
+ metadata.gz: 01b964073b5822742a58c2b2c1e438ee599bf466
4
+ data.tar.gz: a0293eb0eed9928e6ca2b7d4abbbd0fa3572e46a
5
5
  SHA512:
6
- metadata.gz: b08f5d4d63371d12f7a73bf810ab97f8e2288d52e4a60913c29ba8797c55567d6c5a96b95a243079e0ef57cb8a15a5e7188a954f37c7588d6314bb558ecc1367
7
- data.tar.gz: b48b25cee53d3b7158acce346bd608eec8ce97628949d3740f3c133bee7efa86cfe53ba2fe1b0710e1d4c91472593aeef2f0f8aaa930e3c8b7bf8878e9953943
6
+ metadata.gz: d76198cc3d39291a3d14c1b79b60a975cefd37a8ffdf50e9afddd2ae0439f1710f990bc79eb9ad1d4e7a9498befd0921abf264c868715c4394db5e65bfa5c8c0
7
+ data.tar.gz: fd64bb7fd5f1d5baa4686edb1854631f062bc71dc64bfb4305a514c96e35c55c0c60ebce1184d3c3563d3b01f841e6cf8ff3b41bdbc173e2401460d87ab795bb
data/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  # Nlpir_win
2
2
 
3
- A rubygem wrapper of chinese segment tools ICTCLAS2013
3
+ A rubygem wrapper of chinese segment tools ICTCLAS2014
4
4
 
5
- Nlpir version 0.0.4 , gem nlpir-0.0.4-x86-mingw32 support '>=ruby2.0.0' on win7.
5
+ Nlpir version 0.0.4 , gem nlpir-0.0.4-x86-mingw32 support '~ ruby2.0.0' on win7. For *nix OS is [nlpir](https://github.com/JoeWoo/nlpir)
6
6
 
7
7
  ## Installation
8
8
 
@@ -45,8 +45,148 @@ some DEFINE you may use :
45
45
  ```
46
46
 
47
47
  after you gem install it:
48
+ ##ruby-style func
49
+ ```ruby
50
+ require 'nlpir'
51
+ include Nlpir
48
52
 
49
- also can see some examples from test cases [here](https://github.com/JoeWoo/nlpir_win/blob/master/test/test_nlpir.rb)
53
+ s = "坚定不移沿着中国特色社会主义道路前进 为全面建成小康社会而奋斗"
54
+ #first of all : Call the NLPIR API nlpir_init
55
+
56
+ nlpir_init(File.expand_path("../", __FILE__),UTF8_CODE)
57
+
58
+ #example1: Process a paragraph, and return the result text with POS or not
59
+ puts text_proc(s, NLPIR_TRUE)
60
+ puts text_proc(s, NLPIR_FALSE)
61
+
62
+ #example2: Process a paragraph, and return an array filled elements are POSed words.
63
+ #tips: text_procA() return the array, and its memory is malloced by NLPIR, it will be freed by nlpir_exit() (memory in server)
64
+
65
+ words_list = text_procA(s)
66
+ i=1
67
+ words_list.each do |a|
68
+ sWhichDic=""
69
+ case a.word_type
70
+ when 0
71
+ sWhichDic = "核心词典"
72
+ when 1
73
+ sWhichDic = "用户词典"
74
+ when 2
75
+ sWhichDic = "专业词典"
76
+ end
77
+ puts "No.#{i}:start:#{a.start}, length:#{a.length}, POS_ID:#{a.sPOS},word_ID:#{a.word_ID},word_type:#{a.word_type} , UserDefine:#{sWhichDic}, Word:#{s.byteslice(a.start,a.length)}, Weight:#{a.weight}\n"
78
+ i += 1
79
+ end
80
+
81
+ #example3: Process a paragraph, and return an array filled elements are POSed words.
82
+ #tips: text_procAW() return the array, and its memory is malloced by ruby::fiddle,and be collect by GC (memory in agent)
83
+
84
+ words_list = text_procAW(s)
85
+ i=1
86
+ words_list.each do |a|
87
+ sWhichDic=""
88
+ case a.word_type
89
+ when 0
90
+ sWhichDic = "核心词典"
91
+ when 1
92
+ sWhichDic = "用户词典"
93
+ when 2
94
+ sWhichDic = "专业词典"
95
+ end
96
+ puts "No.#{i}:start:#{a.start}, length:#{a.length}, POS_ID:#{a.sPOS},word_ID:#{a.word_ID},word_type:#{a.word_type} , UserDefine:#{sWhichDic}, Word:#{s.byteslice(a.start,a.length)}, Weight:#{a.weight}\n"
97
+ i += 1
98
+ end
99
+
100
+ #example4: Process a text file, and wirte the result text to file
101
+ puts file_proc("./test.txt", "./test_result.txt", NULL)
102
+
103
+
104
+ #example5: Get ProcessAWordCount, it returns the count of the words
105
+ puts count = file_wordcount(s)
106
+
107
+
108
+
109
+ #example6: Add/Delete a word to the user dictionary (the path of user dictionary of the path is ./data/userdict.dpat)
110
+ puts text_proc("我们都是爱思客")
111
+ #add a user word
112
+ add_userword("都是爱思客 n")
113
+ add_userword("思客 n")
114
+ add_userword("你是 n")
115
+ add_userword("都是客 n")
116
+ add_userword("都是爱 n")
117
+ puts text_proc("我们都是爱思客")
118
+ #save the user word to disk
119
+ save_userdict()
120
+ puts text_proc("我们都是爱思客")
121
+ #delete a user word
122
+ del_userword("都是爱思客")
123
+ save_userdict()
124
+ puts text_proc("我们都是爱思客")
125
+
126
+
127
+ #example7: Import user-defined dictionary from a text file. and puts NLPIR result
128
+ puts text_proc("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
129
+ puts import_userdict("./userdict.txt")
130
+ #you can see the example file: ./userdict.txt to know the userdict`s format requirements
131
+ save_userdict()
132
+ puts text_proc("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
133
+
134
+
135
+ #example8: Get keywords of text
136
+ #2nd parameter is the MaxNumber of keywords
137
+ #3rd parameter is a swith to show the WeightOut or not
138
+ puts text_keywords(s, 50,NLPIR_TRUE)
139
+
140
+
141
+ #example9: Get keywords from file
142
+ puts file_keywords("./test.txt",50, NLPIR_TRUE)
143
+
144
+
145
+ #example10: Find new words from text
146
+ puts text_newwords(s, 50, NLPIR_TRUE)
147
+
148
+
149
+ #example11: Find new words from file
150
+ puts file_newwords("./test.txt")
151
+
152
+
153
+ #example12: Extract a finger print from the paragraph
154
+ puts text_fingerprint(s)
155
+
156
+
157
+ #example13: select which pos map will use
158
+ #ICT_POS_MAP_FIRST #//计算所一级标注集
159
+ #ICT_POS_MAP_SECOND #//计算所二级标注集
160
+ #PKU_POS_MAP_SECOND #//北大二级标注集
161
+ #PKU_POS_MAP_FIRST #//北大一级标注集
162
+ setPOSmap(ICT_POS_MAP_FIRST)
163
+ puts text_proc(s)
164
+ setPOSmap(PKU_POS_MAP_FIRST)
165
+ puts text_proc(s)
166
+
167
+
168
+
169
+ # 新词发现批量处理功能
170
+ #以下函数为2013版本专门针对新词发现的过程,一般建议脱机实现,不宜在线处理
171
+ # 新词识别完成后,再自动导入到分词系统中,即可完成
172
+
173
+ NWI_start() #启动新词发现功能
174
+ f=File.new("test.txt", "r")
175
+ text=f.read
176
+ NWI_addfile(text)#添加新词训练的文件,可反复添加
177
+ NWI_complete()#添加文件或者训练内容结束
178
+ f.close()
179
+ puts NWI_result()#输出新词识别结果
180
+ #puts file_proc("a.txt","b.txt")
181
+ NWI_result2userdict()#新词识别结果导入到用户词典
182
+
183
+
184
+ #at the end call NLPIR_Exit() to free system materials
185
+ nlpir_exit()
186
+
187
+
188
+ ```
189
+ ##c-style func
50
190
 
51
191
  ```ruby
52
192
 
@@ -171,12 +311,9 @@ also can see some examples from test cases [here](https://github.com/JoeWoo/nlpi
171
311
  # 新词识别完成后,再自动导入到分词系统中,即可完成
172
312
 
173
313
  NLPIR_NWI_Start() #启动新词发现功能
174
- f=File.new("test.txt", "r")
175
- text=f.read
176
- NLPIR_NWI_AddFile(text)#添加新词训练的文件,可反复添加
314
+ NLPIR_NWI_AddFile("./text.txt")#添加新词训练的文件,可反复添加
177
315
  NLPIR_NWI_Complete()#添加文件或者训练内容结束
178
- f.close()
179
- puts NLPIR_NWI_GetResult()#输出新词识别结果
316
+ puts NLPIR_NWI_GetResult().to_s#输出新词识别结果 可传入一个参数NLPIR_TRUE或NLPIR_FALSE,用于是否输出词性
180
317
  #puts NLPIR_FileProcess("a.txt","b.txt")
181
318
  NLPIR_NWI_Result2UserDict()#新词识别结果导入到用户词典
182
319
 
@@ -186,6 +323,7 @@ also can see some examples from test cases [here](https://github.com/JoeWoo/nlpi
186
323
 
187
324
  ```
188
325
 
326
+
189
327
  ## Contributing
190
328
 
191
329
  1. Fork it
Binary file
@@ -1,15 +1,16 @@
1
- <?xmlversion="1.0"encoding="GB2312"?>
2
- <NLPIR>
3
- <TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
4
- <UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
5
- <UserDictPrior>On</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
6
- <FieldDict>off</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
7
- <GranularityContorl>off</GranularityContorl>
8
- <Log>On</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
9
- <version>2013</version>//ϵͳ�汾��
10
- <Modify>2012-11-14</Modify>//ϵͳ����޶�ʱ��
11
- <Lexicon>2012-11-14</Lexicon>//�ʵ�����޶�ʱ��
12
- <adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
13
- <author>�Ż�ƽ��ʿ</author>//����
14
- <Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
15
- </NLPIR>
1
+ <?xmlversion="1.0"encoding="GB2312"?>
2
+ <NLPIR>
3
+ <TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
4
+ <UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
5
+ <UserDictPrior>Off</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
6
+ <FieldDict>on</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
7
+ <GranularityContorl>off</GranularityContorl>
8
+ <Log>Off</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
9
+ <version>2013</version>//ϵͳ�汾��
10
+ <Modify>2012-11-14</Modify>//ϵͳ����޶�ʱ��
11
+ <Lexicon>2012-11-14</Lexicon>//�ʵ�����޶�ʱ��
12
+ <Sentiment>On</Sentiment>//On,Off�����磺Off:�ر���з������ܣ�On:����з�������
13
+ <adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
14
+ <author>�Ż�ƽ��ʿ</author>//����
15
+ <Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
16
+ </NLPIR>
@@ -0,0 +1,2 @@
1
+ ���ť��ݸ޴ܸ���΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�ábQBw6u|V
2
+ -rt�����������I+g���蟋�����'"&t#''������̣ռ�܃�����רע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ���������˛��̷���ޜ��̍�Ӟ�����������ā���و����ҁ��䂜�񖔁˃�������ᔆ���Oww.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��עЮ������. /=-++<���ɩв�ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿ�����΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����Fww.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������ϓ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ����������������順�ӧ����������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ�������Ϣ��������Ϣ��ȫ���������������ھ��ѧ���������ҵӦ�á�������Դ��ӭ����www.nlpir.org.��ע����΢��@ICTCLAS�Ż�ƽ��ʿ�Ż�ƽ��ʿרע��΢�����㡢��Ȼ���Դ���
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -94,3 +94,7 @@ wyz
94
94
  x
95
95
  y
96
96
  z
97
+ xu
98
+ xe
99
+ xs
100
+ xm
Binary file
@@ -1,25 +1,28 @@
1
+
1
2
  ��˿ ���� �⹹ ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�΢
3
+ ����ɽ �ΰ������� ���������� �������� ��Ʒ���� ��������
4
+ �й���ɫ������� �����������׶� ��ѧ��չ�� ��̬���� ��ṫƽ���� �������� �л�����ΰ���� ���幦���� �����ռ俪�� ��������� ���緢չһ�廯 С����� �������� ��������Ļ�ǿ�� ��ҵ��λ ���� ����ѡ�� �������� ���� ��������г��������� ��������� �������� ȫ������ ��Ҫս�Ի��� ȡ���ش��չ ��̬ϵͳ �Ȳ��ƽ� ���� �ӿ�ת�� ���ʽ���Σ�� ���¶���׼�� ������������ ��������Ƚ��Ļ� ũҵ�ۺ��������� ʳƷҩƷ��ȫ ��۵�����ϵ ����ͬԸ�� �����͹��� ˾������ �������ϱ��� ȫ���л���Ů ս�������˲�ҵ ����Ϸ�Ȩ�� ȫ������ͬ ������������ ��ǰ����ƽ�� �Ļ���ʵ�� ��֤���񵱼����� ��������ϵ ȫ��ҽ�� ����ֿ� ����������չ �����Ч �����ƶȸĸ� �������� ����δ�� ʵ�徭�� ��ѧ��ˮƽ ���㵳�� ���Ȼ� ȫ�潨��С����� �������� ��ʳ��ȫ ����ʵ�� ����ɲ� �������� �������� ���ر��� ����ڵ� ��ѧ�ش� �������� ���������� ��������� ��ȫ��в ��������� �����Ļ� ������� ����Ӱ���� �Ծ����� �Ͷ����� �ִ�����ҵ �������� ���˹�ͬ�� ��ʵ�ƽ� �Ҹ����� ���ļ�ֵ�� ���η��� ����������� ������ ƽ�Ȼ��� ������ҡ��� ����� ��Ҫս�Ի����� ת�侭�÷�չ��ʽ �ӿ�ת�侭�÷�չ��ʽ ���������ļ�ֵ��ϵ ����ȡ���ش��չ ʵ���л�����ΰ���� �����ں�ʽ��չ ��������������Ȼ� �ĸ↑�� ά����ṫƽ����
2
5
  ��˿ ���� �⹹ ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�΢
6
+ ��˿ ���� �⹹ ��˿�Ļ� ��Q �������Ļ� Ⱥ���Գ� ���ݱ�΢ ��˧
7
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#��˧#
3
8
 
4
9
 
5
10
 
6
11
 
7
12
 
8
13
 
14
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
15
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
9
16
 
10
17
 
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-
24
-
25
-
18
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
19
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
20
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
21
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
22
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
23
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
24
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
25
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
26
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
27
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
28
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
@@ -94,3 +94,7 @@ w
94
94
  x
95
95
  y
96
96
  z
97
+ xu
98
+ xe
99
+ xs
100
+ xm
@@ -94,3 +94,7 @@ w
94
94
  x
95
95
  y
96
96
  z
97
+ x
98
+ x
99
+ x
100
+ x
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -3,7 +3,7 @@ require File.expand_path("../nlpir/version", __FILE__)
3
3
  require 'fiddle'
4
4
  require 'fiddle/struct'
5
5
  require 'fiddle/import'
6
- require 'fileutils'
6
+ require 'fileutils'
7
7
  include Fiddle::CParser
8
8
  include Fiddle::Importer
9
9
 
@@ -14,169 +14,170 @@ module Nlpir
14
14
  ICT_POS_MAP_FIRST = 1 #计算所一级标注集
15
15
  ICT_POS_MAP_SECOND = 0 #计算所二级标注集
16
16
  PKU_POS_MAP_SECOND = 2 #北大二级标注集
17
- PKU_POS_MAP_FIRST = 3 #北大一级标注集
17
+ PKU_POS_MAP_FIRST = 3 #北大一级标注集
18
18
  POS_SIZE = 40
19
19
 
20
20
  Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
21
- 'int word_ID','int word_type','int weight']
22
-
21
+ 'int word_ID','int word_type','int weight']
22
+
23
23
  GBK_CODE = 0 #默认支持GBK编码
24
24
  UTF8_CODE = GBK_CODE + 1 #UTF8编码
25
25
  BIG5_CODE = GBK_CODE + 2 #BIG5编码
26
26
  GBK_FANTI_CODE = GBK_CODE + 3 #GBK编码,里面包含繁体字
27
27
 
28
+ @charset = 'utf-8'
28
29
 
29
30
  #提取链接库接口
30
31
  libm = Fiddle.dlopen(File.expand_path("../../bin/NLPIR.dll", __FILE__))
31
32
 
32
33
  NLPIR_Init_rb = Fiddle::Function.new(
33
- libm['?NLPIR_Init@@YA_NPBDH@Z'],
34
- [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
35
- Fiddle::TYPE_INT
34
+ libm['NLPIR_Init'],
35
+ [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
36
+ Fiddle::TYPE_INT
36
37
  )
37
- NLPIR_Exit_rb = Fiddle::Function.new(
38
- libm['?NLPIR_Exit@@YA_NXZ'],
39
- [],
40
- Fiddle::TYPE_INT
38
+ NLPIR_Exit_rb = Fiddle::Function.new(
39
+ libm['NLPIR_Exit'],
40
+ [],
41
+ Fiddle::TYPE_INT
41
42
  )
42
- NLPIR_ImportUserDict_rb = Fiddle::Function.new(
43
- libm['?NLPIR_ImportUserDict@@YAIPBD@Z'],
44
- [Fiddle::TYPE_VOIDP],
45
- Fiddle::TYPE_INT
43
+ NLPIR_ImportUserDict_rb = Fiddle::Function.new(
44
+ libm['NLPIR_ImportUserDict'],
45
+ [Fiddle::TYPE_VOIDP],
46
+ Fiddle::TYPE_INT
46
47
  )
47
- NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
48
- libm['?NLPIR_ParagraphProcess@@YAPBDPBDH@Z'],
49
- [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
50
- Fiddle::TYPE_VOIDP
48
+ NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
49
+ libm['NLPIR_ParagraphProcess'],
50
+ [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
51
+ Fiddle::TYPE_VOIDP
51
52
  )
52
- NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
53
- libm['?NLPIR_ParagraphProcessA@@YAPBUresult_t@@PBDPAH_N@Z'],
54
- [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
55
- Fiddle::TYPE_VOIDP
53
+ NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
54
+ libm['NLPIR_ParagraphProcessA'],
55
+ [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
56
+ Fiddle::TYPE_VOIDP
56
57
  )
57
- NLPIR_FileProcess_rb = Fiddle::Function.new(
58
- libm['?NLPIR_FileProcess@@YANPBD0H@Z'],
59
- [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
60
- Fiddle::TYPE_DOUBLE
58
+ NLPIR_FileProcess_rb = Fiddle::Function.new(
59
+ libm['NLPIR_FileProcess'],
60
+ [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
61
+ Fiddle::TYPE_DOUBLE
61
62
  )
62
- NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
63
- libm['?NLPIR_GetParagraphProcessAWordCount@@YAHPBD@Z'],
64
- [Fiddle::TYPE_VOIDP],
65
- Fiddle::TYPE_INT
63
+ NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
64
+ libm['NLPIR_GetParagraphProcessAWordCount'],
65
+ [Fiddle::TYPE_VOIDP],
66
+ Fiddle::TYPE_INT
66
67
  )
67
- NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
68
- libm['?NLPIR_ParagraphProcessAW@@YAXHPAUresult_t@@@Z'],
69
- [Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
70
- Fiddle::TYPE_INT
68
+ NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
69
+ libm['NLPIR_ParagraphProcessAW'],
70
+ [Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
71
+ Fiddle::TYPE_INT
71
72
  )
72
- NLPIR_AddUserWord_rb = Fiddle::Function.new(
73
- libm['?NLPIR_AddUserWord@@YAHPBD@Z'],
74
- [Fiddle::TYPE_VOIDP],
75
- Fiddle::TYPE_INT
73
+ NLPIR_AddUserWord_rb = Fiddle::Function.new(
74
+ libm['NLPIR_AddUserWord'],
75
+ [Fiddle::TYPE_VOIDP],
76
+ Fiddle::TYPE_INT
76
77
  )
77
- NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
78
- libm['?NLPIR_SaveTheUsrDic@@YAHXZ'],
79
- [],
80
- Fiddle::TYPE_INT
78
+ NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
79
+ libm['NLPIR_SaveTheUsrDic'],
80
+ [],
81
+ Fiddle::TYPE_INT
81
82
  )
82
- NLPIR_DelUsrWord_rb = Fiddle::Function.new(
83
- libm['?NLPIR_DelUsrWord@@YAHPBD@Z'],
84
- [Fiddle::TYPE_VOIDP],
85
- Fiddle::TYPE_INT
83
+ NLPIR_DelUsrWord_rb = Fiddle::Function.new(
84
+ libm['NLPIR_DelUsrWord'],
85
+ [Fiddle::TYPE_VOIDP],
86
+ Fiddle::TYPE_INT
86
87
  )
87
- NLPIR_GetKeyWords_rb = Fiddle::Function.new(
88
- libm['?NLPIR_GetKeyWords@@YAPBDPBDH_N@Z'],
88
+ NLPIR_GetKeyWords_rb = Fiddle::Function.new(
89
+ libm['NLPIR_GetKeyWords'],
89
90
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
90
91
  Fiddle::TYPE_VOIDP
91
92
  )
92
- NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
93
- libm['?NLPIR_GetFileKeyWords@@YAPBDPBDH_N@Z'],
93
+ NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
94
+ libm['NLPIR_GetFileKeyWords'],
94
95
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
95
96
  Fiddle::TYPE_VOIDP
96
97
  )
97
- NLPIR_GetNewWords_rb = Fiddle::Function.new(
98
- libm['?NLPIR_GetNewWords@@YAPBDPBDH_N@Z'],
98
+ NLPIR_GetNewWords_rb = Fiddle::Function.new(
99
+ libm['NLPIR_GetNewWords'],
99
100
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
100
101
  Fiddle::TYPE_VOIDP
101
102
  )
102
- NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
103
- libm['?NLPIR_GetFileNewWords@@YAPBDPBDH_N@Z'],
103
+ NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
104
+ libm['NLPIR_GetFileNewWords'],
104
105
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
105
106
  Fiddle::TYPE_VOIDP
106
107
  )
107
- NLPIR_FingerPrint_rb = Fiddle::Function.new(
108
- libm['?NLPIR_FingerPrint@@YAKPBD@Z'],
108
+ NLPIR_FingerPrint_rb = Fiddle::Function.new(
109
+ libm['NLPIR_FingerPrint'],
109
110
  [Fiddle::TYPE_VOIDP],
110
111
  Fiddle::TYPE_LONG
111
112
  )
112
- NLPIR_SetPOSmap_rb = Fiddle::Function.new(
113
- libm['?NLPIR_SetPOSmap@@YAHH@Z'],
114
- [Fiddle::TYPE_INT],
115
- Fiddle::TYPE_INT
113
+ NLPIR_SetPOSmap_rb = Fiddle::Function.new(
114
+ libm['NLPIR_SetPOSmap'],
115
+ [Fiddle::TYPE_INT],
116
+ Fiddle::TYPE_INT
116
117
  )
117
118
 
118
- NLPIR_NWI_Start_rb = Fiddle::Function.new(
119
- libm['?NLPIR_NWI_Start@@YA_NXZ'],
120
- [],
121
- Fiddle::TYPE_INT
119
+ NLPIR_NWI_Start_rb = Fiddle::Function.new(
120
+ libm['NLPIR_NWI_Start'],
121
+ [],
122
+ Fiddle::TYPE_INT
122
123
  )
123
- NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
124
- libm['?NLPIR_NWI_AddFile@@YAHPBD@Z'],
125
- [Fiddle::TYPE_VOIDP],
126
- Fiddle::TYPE_INT
124
+ NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
125
+ libm['NLPIR_NWI_AddFile'],
126
+ [Fiddle::TYPE_VOIDP],
127
+ Fiddle::TYPE_INT
127
128
  )
128
- NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
129
- libm['?NLPIR_NWI_AddMem@@YA_NPBD@Z'],
130
- [Fiddle::TYPE_VOIDP],
131
- Fiddle::TYPE_INT
129
+ NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
130
+ libm['NLPIR_NWI_AddMem'],
131
+ [Fiddle::TYPE_VOIDP],
132
+ Fiddle::TYPE_INT
132
133
  )
133
- NLPIR_NWI_Complete_rb = Fiddle::Function.new(
134
- libm['?NLPIR_NWI_Complete@@YA_NXZ'],
135
- [],
136
- Fiddle::TYPE_INT
134
+ NLPIR_NWI_Complete_rb = Fiddle::Function.new(
135
+ libm['NLPIR_NWI_Complete'],
136
+ [],
137
+ Fiddle::TYPE_INT
137
138
  )
138
- NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
139
- libm['?NLPIR_NWI_GetResult@@YAPBD_N@Z'],
140
- [Fiddle::TYPE_INT],
141
- Fiddle::TYPE_VOIDP
139
+ NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
140
+ libm['NLPIR_NWI_GetResult'],
141
+ [Fiddle::TYPE_INT],
142
+ Fiddle::TYPE_VOIDP
142
143
  )
143
144
  NLPIR_NWI_Result2UserDict_rb = Fiddle::Function.new(
144
- libm['?NLPIR_NWI_Result2UserDict@@YAIXZ'],
145
- [],
146
- Fiddle::TYPE_VOIDP
145
+ libm['NLPIR_NWI_Result2UserDict'],
146
+ [],
147
+ Fiddle::TYPE_VOIDP
147
148
  )
148
149
 
149
150
  #--函数
150
151
 
151
- def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE, filepath)
152
- filepath += "/Data/"
153
- if File.exist?(filepath)==false
154
- FileUtils.mkdir(filepath)
152
+ def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE)
153
+ sInitDirPath += "/Data/"
154
+ if File.exist?(sInitDirPath)==false
155
+ FileUtils.mkdir(sInitDirPath)
155
156
  filemother = File.expand_path("../Data/", __FILE__)
156
- list=Dir.entries(filemother)
157
- list.each_index do |x|
158
- t = filemother+"/"+list[x]
159
- FileUtils.cp(t,filepath) if !File.directory?(t)
160
- end
161
- end
162
-
163
-
164
- NLPIR_Init_rb.call(sInitDirPath,encoding)
165
-
157
+ FileUtils.copy_entry filemother,sInitDirPath
158
+ end
159
+ @charset = 'gbk' if encoding == GBK_CODE
160
+ @charset = 'utf-8' if encoding == UTF8_CODE
161
+ @charset = 'big5' if encoding == BIG5_CODE
162
+ @charset = 'gbk' if encoding == GBK_FANTI_CODE
163
+ NLPIR_Init_rb.call(nil,encoding)
166
164
  end
165
+ alias :nlpir_init :NLPIR_Init
167
166
 
168
167
  def NLPIR_Exit()
169
- i = NLPIR_Exit_rb.call()
170
- return NLPIR_TRUE if i > 0
168
+ NLPIR_Exit_rb.call()
171
169
  end
170
+ alias :nlpir_exit :NLPIR_Exit
172
171
 
173
172
  def NLPIR_ImportUserDict(sFilename)
174
173
  NLPIR_ImportUserDict_rb.call(sFilename)
175
174
  end
175
+ alias :import_userdict :NLPIR_ImportUserDict
176
176
 
177
177
  def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
178
- NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s
178
+ NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset)
179
179
  end
180
+ alias :text_proc :NLPIR_ParagraphProcess
180
181
 
181
182
  def NLPIR_ParagraphProcessA(sParagraph)
182
183
  resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
@@ -190,86 +191,109 @@ NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
190
191
  end
191
192
  return words_list
192
193
  end
194
+ alias :text_procA :NLPIR_ParagraphProcessA
193
195
 
194
- def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
195
- NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
196
- end
196
+ def NLPIR_GetParagraphProcessAWordCount(sParagraph)
197
+ NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
198
+ end
199
+ alias :text_wordcount :NLPIR_GetParagraphProcessAWordCount
197
200
 
198
- def NLPIR_GetParagraphProcessAWordCount(sParagraph)
199
- NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
200
- end
201
+ def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
202
+ NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
203
+ end
204
+ alias :file_proc :NLPIR_FileProcess
201
205
 
202
- def NLPIR_ParagraphProcessAW(sParagraph)
203
- free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
204
- resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
205
- pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
206
- NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
207
- words_list = []
208
- words_list << Result_t.new(pVecResult)
209
- for i in 1...resultCount do
210
- words_list << Result_t.new(pVecResult+=Result_t.size)
211
- end
212
- return words_list
206
+
207
+ def NLPIR_ParagraphProcessAW(sParagraph)
208
+ free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
209
+ resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
210
+ pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
211
+ NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
212
+ words_list = []
213
+ words_list << Result_t.new(pVecResult)
214
+ for i in 1...resultCount do
215
+ words_list << Result_t.new(pVecResult+=Result_t.size)
213
216
  end
217
+ return words_list
218
+ end
219
+ alias :text_procAW :NLPIR_ParagraphProcessAW
214
220
 
215
- def NLPIR_AddUserWord(sWord)
216
- NLPIR_AddUserWord_rb.call(sWord)
217
- end
218
221
 
219
- def NLPIR_SaveTheUsrDic()
220
- NLPIR_SaveTheUsrDic_rb.call()
221
- end
222
+ def NLPIR_AddUserWord(sWord)
223
+ NLPIR_AddUserWord_rb.call(sWord)
224
+ end
225
+ alias :add_userword :NLPIR_AddUserWord
222
226
 
223
- def NLPIR_DelUsrWord(sWord)
224
- NLPIR_DelUsrWord_rb.call(sWord)
225
- end
227
+ def NLPIR_SaveTheUsrDic()
228
+ NLPIR_SaveTheUsrDic_rb.call()
229
+ end
230
+ alias :save_userdict :NLPIR_SaveTheUsrDic
226
231
 
227
- def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
228
- NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s
229
- end
232
+ def NLPIR_DelUsrWord(sWord)
233
+ NLPIR_DelUsrWord_rb.call(sWord)
234
+ end
235
+ alias :del_userword :NLPIR_DelUsrWord
230
236
 
231
- def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
232
- NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
233
- end
237
+ def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
238
+ NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
239
+ end
240
+ alias :text_keywords :NLPIR_GetKeyWords
234
241
 
235
- def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
236
- NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s
237
- end
242
+ def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
243
+ line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
244
+ line.force_encoding('gbk')
245
+ line.encode!(@charset)
246
+ end
247
+ alias :file_keywords :NLPIR_GetFileKeyWords
238
248
 
239
- def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
240
- NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
241
- end
249
+ def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
250
+ NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
251
+ end
252
+ alias :text_newwords :NLPIR_GetNewWords
242
253
 
243
- def NLPIR_FingerPrint(sLine)
244
- NLPIR_FingerPrint_rb.call(sLine)
245
- end
254
+ def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
255
+ NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
256
+ end
257
+ alias :file_newwords :NLPIR_GetFileNewWords
246
258
 
247
- def NLPIR_SetPOSmap(nPOSmap)
248
- NLPIR_SetPOSmap_rb.call(nPOSmap)
249
- end
259
+ def NLPIR_FingerPrint(sLine)
260
+ NLPIR_FingerPrint_rb.call(sLine)
261
+ end
262
+ alias :text_fingerprint :NLPIR_FingerPrint
250
263
 
251
- def NLPIR_NWI_Start()
252
- NLPIR_NWI_Start_rb.call()
253
- end
264
+ def NLPIR_SetPOSmap(nPOSmap)
265
+ NLPIR_SetPOSmap_rb.call(nPOSmap)
266
+ end
267
+ alias :setPOSmap :NLPIR_SetPOSmap
254
268
 
255
- def NLPIR_NWI_AddFile(sFilename)
256
- NLPIR_NWI_AddFile_rb.call(sFilename)
257
- end
269
+ def NLPIR_NWI_Start()
270
+ NLPIR_NWI_Start_rb.call()
271
+ end
272
+ alias :NWI_start :NLPIR_NWI_Start
258
273
 
259
- def NLPIR_NWI_AddMem(sFilename)
260
- NLPIR_NWI_AddMem_rb.call(sFilename)
261
- end
274
+ def NLPIR_NWI_AddFile(sFilename)
275
+ NLPIR_NWI_AddFile_rb.call(sFilename)
276
+ end
277
+ alias :NWI_addfile :NLPIR_NWI_AddFile
262
278
 
263
- def NLPIR_NWI_Complete()
264
- NLPIR_NWI_Complete_rb.call()
265
- end
279
+ def NLPIR_NWI_AddMem(sFilename)
280
+ NLPIR_NWI_AddMem_rb.call(sFilename)
281
+ end
282
+ alias :NWI_addmem :NLPIR_NWI_AddMem
266
283
 
267
- def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
268
- NLPIR_NWI_GetResult_rb.call(bWeightOut)
269
- end
284
+ def NLPIR_NWI_Complete()
285
+ NLPIR_NWI_Complete_rb.call()
286
+ end
287
+ alias :NWI_complete :NLPIR_NWI_Complete
270
288
 
271
- def NLPIR_NWI_Result2UserDict()
272
- NLPIR_NWI_Result2UserDict_rb.call()
273
- end
289
+ def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
290
+ NLPIR_NWI_GetResult_rb.call(bWeightOut)
291
+ end
292
+ alias :NWI_result :NLPIR_NWI_GetResult
293
+
294
+ def NLPIR_NWI_Result2UserDict()
295
+ NLPIR_NWI_Result2UserDict_rb.call()
296
+ end
297
+ alias :NWI_result2userdict :NLPIR_NWI_Result2UserDict
274
298
 
275
299
  end