nlpir 0.1.0-x86-linux → 1.0.0-x86-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +160 -23
  3. data/bin/lib.txt +1738 -0
  4. data/bin/libNLPIR.so +0 -0
  5. data/lib/Data/Configure.xml +4 -3
  6. data/{test/Data/NLPIR.user → lib/Data/DocExtractor.user} +0 -0
  7. data/lib/Data/English/English.pdat +0 -0
  8. data/lib/Data/English/English.pos +0 -0
  9. data/lib/Data/English/English.ung +0 -0
  10. data/lib/Data/English/English.wordlist +0 -0
  11. data/lib/Data/English/Irrel2regular.map +0 -0
  12. data/lib/Data/English/ne.pdat +0 -0
  13. data/lib/Data/English/ne.pos +0 -0
  14. data/lib/Data/English/ne.wordlist +0 -0
  15. data/lib/Data/FTU8.pdat +0 -0
  16. data/lib/Data/FTU8.wordlist +0 -0
  17. data/lib/Data/FTU82GBK.map +0 -0
  18. data/lib/Data/FieldDict.pdat +0 -0
  19. data/lib/Data/FieldDict.pos +0 -0
  20. data/lib/Data/GBK2FTU8.map +0 -0
  21. data/lib/Data/ICTPOS.map +4 -0
  22. data/lib/Data/NLPIR.user +0 -0
  23. data/lib/Data/NewWord.lst +26 -0
  24. data/lib/Data/PKU.map +4 -0
  25. data/lib/Data/PKU_First.map +4 -0
  26. data/lib/Data/UserDict.pdat +0 -0
  27. data/lib/Data/location.map +0 -0
  28. data/{test/Data/FieldDict.pdat → lib/Data/location.pdat} +0 -0
  29. data/lib/Data/location.wordlist +0 -0
  30. data/lib/Data/sentiment.pdat +0 -0
  31. data/lib/Data/sentiment.ung +0 -0
  32. data/lib/nlpir.rb +129 -104
  33. data/lib/nlpir/version.rb +1 -1
  34. data/nlpir.gemspec +1 -1
  35. data/test/findnewword.txt +103 -0
  36. data/test/test_nlpir.rb +137 -133
  37. data/test/test_result.txt +52 -35
  38. metadata +32 -90
  39. data/test/Data/BIG2GBK.map +0 -0
  40. data/test/Data/BIG5.pdat +0 -0
  41. data/test/Data/BIG5.wordlist +0 -0
  42. data/test/Data/BiWord.big +0 -0
  43. data/test/Data/Configure.xml +0 -15
  44. data/test/Data/CoreDict.pdat +0 -0
  45. data/test/Data/CoreDict.pos +0 -0
  46. data/test/Data/CoreDict.unig +0 -0
  47. data/test/Data/FieldDict.pos +0 -0
  48. data/test/Data/GBK.pdat +0 -0
  49. data/test/Data/GBK.wordlist +0 -0
  50. data/test/Data/GBK2BIG.map +0 -0
  51. data/test/Data/GBK2GBKC.map +0 -0
  52. data/test/Data/GBK2UTF.map +0 -0
  53. data/test/Data/GBKA.pdat +0 -0
  54. data/test/Data/GBKA.wordlist +0 -0
  55. data/test/Data/GBKA2UTF.map +0 -0
  56. data/test/Data/GBKC.pdat +0 -0
  57. data/test/Data/GBKC.wordlist +0 -0
  58. data/test/Data/GBKC2GBK.map +0 -0
  59. data/test/Data/GranDict.pdat +6 -3371
  60. data/test/Data/GranDict.pos +0 -0
  61. data/test/Data/ICTPOS.map +0 -96
  62. data/test/Data/NLPIR.ctx +0 -0
  63. data/test/Data/NLPIR_First.map +0 -96
  64. data/test/Data/NewWord.lst +0 -3
  65. data/test/Data/PKU.map +0 -96
  66. data/test/Data/PKU_First.map +0 -96
  67. data/test/Data/UTF2GBK.map +0 -0
  68. data/test/Data/UTF2GBKA.map +0 -0
  69. data/test/Data/UTF8.pdat +0 -0
  70. data/test/Data/UTF8.wordlist +0 -0
  71. data/test/Data/UserDict.pdat +0 -0
  72. data/test/Data/charset.type +0 -0
  73. data/test/Data/nr.ctx +0 -0
  74. data/test/Data/nr.fsa +0 -0
  75. data/test/Data/nr.role +0 -0
Binary file
@@ -2,13 +2,14 @@
2
2
  <NLPIR>
3
3
  <TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
4
4
  <UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
5
- <UserDictPrior>On</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
6
- <FieldDict>off</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
5
+ <UserDictPrior>Off</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
6
+ <FieldDict>on</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
7
7
  <GranularityContorl>off</GranularityContorl>
8
- <Log>On</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
8
+ <Log>Off</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
9
9
  <version>2013</version>//ϵͳ�汾��
10
10
  <Modify>2012-11-14</Modify>//ϵͳ����޶�ʱ��
11
11
  <Lexicon>2012-11-14</Lexicon>//�ʵ�����޶�ʱ��
12
+ <Sentiment>On</Sentiment>//On,Off�����磺Off:�ر���з������ܣ�On:����з�������
12
13
  <adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
13
14
  <author>�Ż�ƽ��ʿ</author>//����
14
15
  <Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -94,3 +94,7 @@ wyz
94
94
  x
95
95
  y
96
96
  z
97
+ xu
98
+ xe
99
+ xs
100
+ xm
Binary file
@@ -1,2 +1,28 @@
1
+
1
2
  ��˿ ���� �⹹ ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�΢
3
+ ����ɽ �ΰ������� ���������� �������� ��Ʒ���� ��������
4
+ �й���ɫ������� �����������׶� ��ѧ��չ�� ��̬���� ��ṫƽ���� �������� �л�����ΰ���� ���幦���� �����ռ俪�� ��������� ���緢չһ�廯 С����� �������� ��������Ļ�ǿ�� ��ҵ��λ ���� ����ѡ�� �������� ���� ��������г��������� ��������� �������� ȫ������ ��Ҫս�Ի��� ȡ���ش��չ ��̬ϵͳ �Ȳ��ƽ� ���� �ӿ�ת�� ���ʽ���Σ�� ���¶���׼�� ������������ ��������Ƚ��Ļ� ũҵ�ۺ��������� ʳƷҩƷ��ȫ ��۵�����ϵ ����ͬԸ�� �����͹��� ˾������ �������ϱ��� ȫ���л���Ů ս�������˲�ҵ ����Ϸ�Ȩ�� ȫ������ͬ ������������ ��ǰ����ƽ�� �Ļ���ʵ�� ��֤���񵱼����� ��������ϵ ȫ��ҽ�� ����ֿ� ����������չ �����Ч �����ƶȸĸ� �������� ����δ�� ʵ�徭�� ��ѧ��ˮƽ ���㵳�� ���Ȼ� ȫ�潨��С����� �������� ��ʳ��ȫ ����ʵ�� ����ɲ� �������� �������� ���ر��� ����ڵ� ��ѧ�ش� �������� ���������� ��������� ��ȫ��в ��������� �����Ļ� ������� ����Ӱ���� �Ծ����� �Ͷ����� �ִ�����ҵ �������� ���˹�ͬ�� ��ʵ�ƽ� �Ҹ����� ���ļ�ֵ�� ���η��� ����������� ������ ƽ�Ȼ��� ������ҡ��� ����� ��Ҫս�Ի����� ת�侭�÷�չ��ʽ �ӿ�ת�侭�÷�չ��ʽ ���������ļ�ֵ��ϵ ����ȡ���ش��չ ʵ���л�����ΰ���� �����ں�ʽ��չ ��������������Ȼ� �ĸ↑�� ά����ṫƽ����
2
5
  ��˿ ���� �⹹ ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�΢
6
+ ��˿ ���� �⹹ ��˿�Ļ� ��Q �������Ļ� Ⱥ���Գ� ���ݱ�΢ ��˧
7
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#��˧#
8
+
9
+
10
+
11
+
12
+
13
+
14
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
15
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
16
+
17
+
18
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
19
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
20
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
21
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
22
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
23
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
24
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
25
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
26
+ ������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
27
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
28
+ ��˿#����#�⹹#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�΢#
@@ -94,3 +94,7 @@ w
94
94
  x
95
95
  y
96
96
  z
97
+ xu
98
+ xe
99
+ xs
100
+ xm
@@ -94,3 +94,7 @@ w
94
94
  x
95
95
  y
96
96
  z
97
+ x
98
+ x
99
+ x
100
+ x
Binary file
Binary file
Binary file
Binary file
@@ -3,7 +3,7 @@ require File.expand_path("../nlpir/version", __FILE__)
3
3
  require 'fiddle'
4
4
  require 'fiddle/struct'
5
5
  require 'fiddle/import'
6
- require 'fileutils'
6
+ require 'fileutils'
7
7
  include Fiddle::CParser
8
8
  include Fiddle::Importer
9
9
 
@@ -14,168 +14,170 @@ module Nlpir
14
14
  ICT_POS_MAP_FIRST = 1 #计算所一级标注集
15
15
  ICT_POS_MAP_SECOND = 0 #计算所二级标注集
16
16
  PKU_POS_MAP_SECOND = 2 #北大二级标注集
17
- PKU_POS_MAP_FIRST = 3 #北大一级标注集
17
+ PKU_POS_MAP_FIRST = 3 #北大一级标注集
18
18
  POS_SIZE = 40
19
19
 
20
20
  Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
21
- 'int word_ID','int word_type','double weight']
22
-
21
+ 'int word_ID','int word_type','int weight']
22
+
23
23
  GBK_CODE = 0 #默认支持GBK编码
24
24
  UTF8_CODE = GBK_CODE + 1 #UTF8编码
25
25
  BIG5_CODE = GBK_CODE + 2 #BIG5编码
26
26
  GBK_FANTI_CODE = GBK_CODE + 3 #GBK编码,里面包含繁体字
27
27
 
28
+ @charset = 'utf-8'
28
29
 
29
30
  #提取链接库接口
30
31
  libm = Fiddle.dlopen(File.expand_path("../../bin/libNLPIR.so", __FILE__))
31
32
 
32
33
  NLPIR_Init_rb = Fiddle::Function.new(
33
- libm['_Z10NLPIR_InitPKci'],
34
+ libm['NLPIR_Init'],
34
35
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
35
36
  Fiddle::TYPE_INT
36
37
  )
37
38
  NLPIR_Exit_rb = Fiddle::Function.new(
38
- libm['_Z10NLPIR_Exitv'],
39
+ libm['NLPIR_Exit'],
39
40
  [],
40
41
  Fiddle::TYPE_INT
41
42
  )
42
43
  NLPIR_ImportUserDict_rb = Fiddle::Function.new(
43
- libm['_Z20NLPIR_ImportUserDictPKc'],
44
+ libm['NLPIR_ImportUserDict'],
44
45
  [Fiddle::TYPE_VOIDP],
45
46
  Fiddle::TYPE_INT
46
47
  )
47
48
  NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
48
- libm['_Z22NLPIR_ParagraphProcessPKci'],
49
+ libm['NLPIR_ParagraphProcess'],
49
50
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
50
51
  Fiddle::TYPE_VOIDP
51
52
  )
52
53
  NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
53
- libm['_Z23NLPIR_ParagraphProcessAPKcPib'],
54
+ libm['NLPIR_ParagraphProcessA'],
54
55
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
55
56
  Fiddle::TYPE_VOIDP
56
57
  )
57
58
  NLPIR_FileProcess_rb = Fiddle::Function.new(
58
- libm['_Z17NLPIR_FileProcessPKcS0_i'],
59
+ libm['NLPIR_FileProcess'],
59
60
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
60
61
  Fiddle::TYPE_DOUBLE
61
62
  )
62
63
  NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
63
- libm['_Z35NLPIR_GetParagraphProcessAWordCountPKc'],
64
+ libm['NLPIR_GetParagraphProcessAWordCount'],
64
65
  [Fiddle::TYPE_VOIDP],
65
66
  Fiddle::TYPE_INT
66
67
  )
67
68
  NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
68
- libm['_Z24NLPIR_ParagraphProcessAWiP8result_t'],
69
+ libm['NLPIR_ParagraphProcessAW'],
69
70
  [Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
70
71
  Fiddle::TYPE_INT
71
72
  )
72
73
  NLPIR_AddUserWord_rb = Fiddle::Function.new(
73
- libm['_Z17NLPIR_AddUserWordPKc'],
74
+ libm['NLPIR_AddUserWord'],
74
75
  [Fiddle::TYPE_VOIDP],
75
76
  Fiddle::TYPE_INT
76
77
  )
77
78
  NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
78
- libm['_Z19NLPIR_SaveTheUsrDicv'],
79
+ libm['NLPIR_SaveTheUsrDic'],
79
80
  [],
80
81
  Fiddle::TYPE_INT
81
82
  )
82
83
  NLPIR_DelUsrWord_rb = Fiddle::Function.new(
83
- libm['_Z16NLPIR_DelUsrWordPKc'],
84
+ libm['NLPIR_DelUsrWord'],
84
85
  [Fiddle::TYPE_VOIDP],
85
86
  Fiddle::TYPE_INT
86
87
  )
87
88
  NLPIR_GetKeyWords_rb = Fiddle::Function.new(
88
- libm['_Z17NLPIR_GetKeyWordsPKcib'],
89
+ libm['NLPIR_GetKeyWords'],
89
90
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
90
91
  Fiddle::TYPE_VOIDP
91
92
  )
92
93
  NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
93
- libm['_Z21NLPIR_GetFileKeyWordsPKcib'],
94
+ libm['NLPIR_GetFileKeyWords'],
94
95
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
95
96
  Fiddle::TYPE_VOIDP
96
97
  )
97
98
  NLPIR_GetNewWords_rb = Fiddle::Function.new(
98
- libm['_Z17NLPIR_GetNewWordsPKcib'],
99
+ libm['NLPIR_GetNewWords'],
99
100
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
100
101
  Fiddle::TYPE_VOIDP
101
102
  )
102
103
  NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
103
- libm['_Z21NLPIR_GetFileNewWordsPKcib'],
104
+ libm['NLPIR_GetFileNewWords'],
104
105
  [Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
105
106
  Fiddle::TYPE_VOIDP
106
107
  )
107
108
  NLPIR_FingerPrint_rb = Fiddle::Function.new(
108
- libm['_Z17NLPIR_FingerPrintPKc'],
109
+ libm['NLPIR_FingerPrint'],
109
110
  [Fiddle::TYPE_VOIDP],
110
111
  Fiddle::TYPE_LONG
111
112
  )
112
113
  NLPIR_SetPOSmap_rb = Fiddle::Function.new(
113
- libm['_Z15NLPIR_SetPOSmapi'],
114
+ libm['NLPIR_SetPOSmap'],
114
115
  [Fiddle::TYPE_INT],
115
116
  Fiddle::TYPE_INT
116
117
  )
117
118
 
118
119
  NLPIR_NWI_Start_rb = Fiddle::Function.new(
119
- libm['_Z15NLPIR_NWI_Startv'],
120
+ libm['NLPIR_NWI_Start'],
120
121
  [],
121
122
  Fiddle::TYPE_INT
122
123
  )
123
124
  NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
124
- libm['_Z17NLPIR_NWI_AddFilePKc'],
125
+ libm['NLPIR_NWI_AddFile'],
125
126
  [Fiddle::TYPE_VOIDP],
126
127
  Fiddle::TYPE_INT
127
128
  )
128
129
  NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
129
- libm['_Z16NLPIR_NWI_AddMemPKc'],
130
+ libm['NLPIR_NWI_AddMem'],
130
131
  [Fiddle::TYPE_VOIDP],
131
132
  Fiddle::TYPE_INT
132
133
  )
133
134
  NLPIR_NWI_Complete_rb = Fiddle::Function.new(
134
- libm['_Z18NLPIR_NWI_Completev'],
135
+ libm['NLPIR_NWI_Complete'],
135
136
  [],
136
137
  Fiddle::TYPE_INT
137
138
  )
138
139
  NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
139
- libm['_Z19NLPIR_NWI_GetResultb'],
140
+ libm['NLPIR_NWI_GetResult'],
140
141
  [Fiddle::TYPE_INT],
141
142
  Fiddle::TYPE_VOIDP
142
143
  )
143
144
  NLPIR_NWI_Result2UserDict_rb = Fiddle::Function.new(
144
- libm['_Z25NLPIR_NWI_Result2UserDictv'],
145
+ libm['NLPIR_NWI_Result2UserDict'],
145
146
  [],
146
147
  Fiddle::TYPE_VOIDP
147
148
  )
148
149
 
149
150
  #--函数
150
151
 
151
- def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE, filepath)
152
- filepath += "/Data/"
153
- if File.exist?(filepath)==false
154
- FileUtils.mkdir(filepath)
152
+ def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE)
153
+ sInitDirPath += "/Data/"
154
+ if File.exist?(sInitDirPath)==false
155
+ FileUtils.mkdir(sInitDirPath)
155
156
  filemother = File.expand_path("../Data/", __FILE__)
156
- list=Dir.entries(filemother)
157
- list.each_index do |x|
158
- t = filemother+"/"+list[x]
159
- FileUtils.cp(t,filepath) if !File.directory?(t)
160
- end
161
- end
162
-
163
-
164
- NLPIR_Init_rb.call(sInitDirPath,encoding)
165
-
157
+ FileUtils.copy_entry filemother,sInitDirPath
158
+ end
159
+ @charset = 'gbk' if encoding == GBK_CODE
160
+ @charset = 'utf-8' if encoding == UTF8_CODE
161
+ @charset = 'big5' if encoding == BIG5_CODE
162
+ @charset = 'gbk' if encoding == GBK_FANTI_CODE
163
+ NLPIR_Init_rb.call(nil,encoding)
166
164
  end
165
+ alias :nlpir_init :NLPIR_Init
167
166
 
168
167
  def NLPIR_Exit()
169
168
  NLPIR_Exit_rb.call()
170
169
  end
170
+ alias :nlpir_exit :NLPIR_Exit
171
171
 
172
172
  def NLPIR_ImportUserDict(sFilename)
173
173
  NLPIR_ImportUserDict_rb.call(sFilename)
174
174
  end
175
+ alias :import_userdict :NLPIR_ImportUserDict
175
176
 
176
177
  def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
177
- NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s
178
+ NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset)
178
179
  end
180
+ alias :text_proc :NLPIR_ParagraphProcess
179
181
 
180
182
  def NLPIR_ParagraphProcessA(sParagraph)
181
183
  resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
@@ -189,86 +191,109 @@ module Nlpir
189
191
  end
190
192
  return words_list
191
193
  end
194
+ alias :text_procA :NLPIR_ParagraphProcessA
192
195
 
193
- def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
194
- NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
195
- end
196
+ def NLPIR_GetParagraphProcessAWordCount(sParagraph)
197
+ NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
198
+ end
199
+ alias :text_wordcount :NLPIR_GetParagraphProcessAWordCount
196
200
 
197
- def NLPIR_GetParagraphProcessAWordCount(sParagraph)
198
- NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
199
- end
201
+ def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
202
+ NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
203
+ end
204
+ alias :file_proc :NLPIR_FileProcess
200
205
 
201
- def NLPIR_ParagraphProcessAW(sParagraph)
202
- free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
203
- resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
204
- pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
205
- NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
206
- words_list = []
207
- words_list << Result_t.new(pVecResult)
208
- for i in 1...resultCount do
209
- words_list << Result_t.new(pVecResult+=Result_t.size)
210
- end
211
- return words_list
206
+
207
+ def NLPIR_ParagraphProcessAW(sParagraph)
208
+ free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
209
+ resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
210
+ pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
211
+ NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
212
+ words_list = []
213
+ words_list << Result_t.new(pVecResult)
214
+ for i in 1...resultCount do
215
+ words_list << Result_t.new(pVecResult+=Result_t.size)
212
216
  end
217
+ return words_list
218
+ end
219
+ alias :text_procAW :NLPIR_ParagraphProcessAW
213
220
 
214
- def NLPIR_AddUserWord(sWord)
215
- NLPIR_AddUserWord_rb.call(sWord)
216
- end
217
221
 
218
- def NLPIR_SaveTheUsrDic()
219
- NLPIR_SaveTheUsrDic_rb.call()
220
- end
222
+ def NLPIR_AddUserWord(sWord)
223
+ NLPIR_AddUserWord_rb.call(sWord)
224
+ end
225
+ alias :add_userword :NLPIR_AddUserWord
221
226
 
222
- def NLPIR_DelUsrWord(sWord)
223
- NLPIR_DelUsrWord_rb.call(sWord)
224
- end
227
+ def NLPIR_SaveTheUsrDic()
228
+ NLPIR_SaveTheUsrDic_rb.call()
229
+ end
230
+ alias :save_userdict :NLPIR_SaveTheUsrDic
225
231
 
226
- def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
227
- NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s
228
- end
232
+ def NLPIR_DelUsrWord(sWord)
233
+ NLPIR_DelUsrWord_rb.call(sWord)
234
+ end
235
+ alias :del_userword :NLPIR_DelUsrWord
229
236
 
230
- def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
231
- NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
232
- end
237
+ def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
238
+ NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
239
+ end
240
+ alias :text_keywords :NLPIR_GetKeyWords
233
241
 
234
- def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
235
- NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s
236
- end
242
+ def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
243
+ line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
244
+ line.force_encoding('gbk')
245
+ line.encode!(@charset)
246
+ end
247
+ alias :file_keywords :NLPIR_GetFileKeyWords
237
248
 
238
- def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
239
- NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
240
- end
249
+ def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
250
+ NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
251
+ end
252
+ alias :text_newwords :NLPIR_GetNewWords
241
253
 
242
- def NLPIR_FingerPrint(sLine)
243
- NLPIR_FingerPrint_rb.call(sLine)
244
- end
254
+ def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
255
+ NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
256
+ end
257
+ alias :file_newwords :NLPIR_GetFileNewWords
245
258
 
246
- def NLPIR_SetPOSmap(nPOSmap)
247
- NLPIR_SetPOSmap_rb.call(nPOSmap)
248
- end
259
+ def NLPIR_FingerPrint(sLine)
260
+ NLPIR_FingerPrint_rb.call(sLine)
261
+ end
262
+ alias :text_fingerprint :NLPIR_FingerPrint
249
263
 
250
- def NLPIR_NWI_Start()
251
- NLPIR_NWI_Start_rb.call()
252
- end
264
+ def NLPIR_SetPOSmap(nPOSmap)
265
+ NLPIR_SetPOSmap_rb.call(nPOSmap)
266
+ end
267
+ alias :setPOSmap :NLPIR_SetPOSmap
253
268
 
254
- def NLPIR_NWI_AddFile(sFilename)
255
- NLPIR_NWI_AddFile_rb.call(sFilename)
256
- end
269
+ def NLPIR_NWI_Start()
270
+ NLPIR_NWI_Start_rb.call()
271
+ end
272
+ alias :NWI_start :NLPIR_NWI_Start
257
273
 
258
- def NLPIR_NWI_AddMem(sFilename)
259
- NLPIR_NWI_AddMem_rb.call(sFilename)
260
- end
274
+ def NLPIR_NWI_AddFile(sFilename)
275
+ NLPIR_NWI_AddFile_rb.call(sFilename)
276
+ end
277
+ alias :NWI_addfile :NLPIR_NWI_AddFile
261
278
 
262
- def NLPIR_NWI_Complete()
263
- NLPIR_NWI_Complete_rb.call()
264
- end
279
+ def NLPIR_NWI_AddMem(sFilename)
280
+ NLPIR_NWI_AddMem_rb.call(sFilename)
281
+ end
282
+ alias :NWI_addmem :NLPIR_NWI_AddMem
265
283
 
266
- def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
267
- NLPIR_NWI_GetResult_rb.call(bWeightOut)
268
- end
284
+ def NLPIR_NWI_Complete()
285
+ NLPIR_NWI_Complete_rb.call()
286
+ end
287
+ alias :NWI_complete :NLPIR_NWI_Complete
269
288
 
270
- def NLPIR_NWI_Result2UserDict()
271
- NLPIR_NWI_Result2UserDict_rb.call()
272
- end
289
+ def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
290
+ NLPIR_NWI_GetResult_rb.call(bWeightOut)
291
+ end
292
+ alias :NWI_result :NLPIR_NWI_GetResult
293
+
294
+ def NLPIR_NWI_Result2UserDict()
295
+ NLPIR_NWI_Result2UserDict_rb.call()
296
+ end
297
+ alias :NWI_result2userdict :NLPIR_NWI_Result2UserDict
273
298
 
274
299
  end