nlpir 0.1.0-x86-linux → 1.0.0-x86-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +160 -23
- data/bin/lib.txt +1738 -0
- data/bin/libNLPIR.so +0 -0
- data/lib/Data/Configure.xml +4 -3
- data/{test/Data/NLPIR.user → lib/Data/DocExtractor.user} +0 -0
- data/lib/Data/English/English.pdat +0 -0
- data/lib/Data/English/English.pos +0 -0
- data/lib/Data/English/English.ung +0 -0
- data/lib/Data/English/English.wordlist +0 -0
- data/lib/Data/English/Irrel2regular.map +0 -0
- data/lib/Data/English/ne.pdat +0 -0
- data/lib/Data/English/ne.pos +0 -0
- data/lib/Data/English/ne.wordlist +0 -0
- data/lib/Data/FTU8.pdat +0 -0
- data/lib/Data/FTU8.wordlist +0 -0
- data/lib/Data/FTU82GBK.map +0 -0
- data/lib/Data/FieldDict.pdat +0 -0
- data/lib/Data/FieldDict.pos +0 -0
- data/lib/Data/GBK2FTU8.map +0 -0
- data/lib/Data/ICTPOS.map +4 -0
- data/lib/Data/NLPIR.user +0 -0
- data/lib/Data/NewWord.lst +26 -0
- data/lib/Data/PKU.map +4 -0
- data/lib/Data/PKU_First.map +4 -0
- data/lib/Data/UserDict.pdat +0 -0
- data/lib/Data/location.map +0 -0
- data/{test/Data/FieldDict.pdat → lib/Data/location.pdat} +0 -0
- data/lib/Data/location.wordlist +0 -0
- data/lib/Data/sentiment.pdat +0 -0
- data/lib/Data/sentiment.ung +0 -0
- data/lib/nlpir.rb +129 -104
- data/lib/nlpir/version.rb +1 -1
- data/nlpir.gemspec +1 -1
- data/test/findnewword.txt +103 -0
- data/test/test_nlpir.rb +137 -133
- data/test/test_result.txt +52 -35
- metadata +32 -90
- data/test/Data/BIG2GBK.map +0 -0
- data/test/Data/BIG5.pdat +0 -0
- data/test/Data/BIG5.wordlist +0 -0
- data/test/Data/BiWord.big +0 -0
- data/test/Data/Configure.xml +0 -15
- data/test/Data/CoreDict.pdat +0 -0
- data/test/Data/CoreDict.pos +0 -0
- data/test/Data/CoreDict.unig +0 -0
- data/test/Data/FieldDict.pos +0 -0
- data/test/Data/GBK.pdat +0 -0
- data/test/Data/GBK.wordlist +0 -0
- data/test/Data/GBK2BIG.map +0 -0
- data/test/Data/GBK2GBKC.map +0 -0
- data/test/Data/GBK2UTF.map +0 -0
- data/test/Data/GBKA.pdat +0 -0
- data/test/Data/GBKA.wordlist +0 -0
- data/test/Data/GBKA2UTF.map +0 -0
- data/test/Data/GBKC.pdat +0 -0
- data/test/Data/GBKC.wordlist +0 -0
- data/test/Data/GBKC2GBK.map +0 -0
- data/test/Data/GranDict.pdat +6 -3371
- data/test/Data/GranDict.pos +0 -0
- data/test/Data/ICTPOS.map +0 -96
- data/test/Data/NLPIR.ctx +0 -0
- data/test/Data/NLPIR_First.map +0 -96
- data/test/Data/NewWord.lst +0 -3
- data/test/Data/PKU.map +0 -96
- data/test/Data/PKU_First.map +0 -96
- data/test/Data/UTF2GBK.map +0 -0
- data/test/Data/UTF2GBKA.map +0 -0
- data/test/Data/UTF8.pdat +0 -0
- data/test/Data/UTF8.wordlist +0 -0
- data/test/Data/UserDict.pdat +0 -0
- data/test/Data/charset.type +0 -0
- data/test/Data/nr.ctx +0 -0
- data/test/Data/nr.fsa +0 -0
- data/test/Data/nr.role +0 -0
data/bin/libNLPIR.so
CHANGED
Binary file
|
data/lib/Data/Configure.xml
CHANGED
@@ -2,13 +2,14 @@
|
|
2
2
|
<NLPIR>
|
3
3
|
<TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
|
4
4
|
<UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
|
5
|
-
<UserDictPrior>
|
6
|
-
<FieldDict>
|
5
|
+
<UserDictPrior>Off</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
|
6
|
+
<FieldDict>on</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
|
7
7
|
<GranularityContorl>off</GranularityContorl>
|
8
|
-
<Log>
|
8
|
+
<Log>Off</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
|
9
9
|
<version>2013</version>//ϵͳ�汾��
|
10
10
|
<Modify>2012-11-14</Modify>//ϵͳ�����ʱ��
|
11
11
|
<Lexicon>2012-11-14</Lexicon>//�ʵ������ʱ��
|
12
|
+
<Sentiment>On</Sentiment>//On,Off�����磺Off:�ر���з������ܣ�On:����з�������
|
12
13
|
<adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
|
13
14
|
<author>�Ż�ƽ��ʿ</author>//����
|
14
15
|
<Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/FTU8.pdat
ADDED
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/FieldDict.pdat
CHANGED
Binary file
|
data/lib/Data/FieldDict.pos
CHANGED
Binary file
|
Binary file
|
data/lib/Data/ICTPOS.map
CHANGED
data/lib/Data/NLPIR.user
CHANGED
Binary file
|
data/lib/Data/NewWord.lst
CHANGED
@@ -1,2 +1,28 @@
|
|
1
|
+
|
1
2
|
��˿ ���� � ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�
|
3
|
+
����ɽ �ΰ������� ���������� �������� ��Ʒ���� ��������
|
4
|
+
�й���ɫ������� ������������ ��ѧ��չ�� ��̬���� ��ṫƽ���� �������� �л�����ΰ���� ���幦���� �����ռ俪�� ��������� ���緢չһ�廯 С����� �������� ��������Ļ�ǿ�� ��ҵ��λ ���� ����ѡ�� �������� ���� ��������г��������� ��������� �������� ȫ������ ��Ҫս�Ի��� ȡ���ش��չ ��̬ϵͳ �Ȳ��ƽ� ���� �ӿ�ת�� ���ʽ���Σ�� ���¶����� ������������ ��������Ƚ��Ļ� ũҵ�ۺ��������� ʳƷҩƷ��ȫ ��۵�����ϵ ����ͬԸ�� �������� ˾������ �������ϱ��� ȫ���л���Ů ս�������˲�ҵ ����Ϸ�Ȩ�� ȫ������ͬ ������������ ��ǰ����ƽ�� �Ļ���ʵ�� ��֤�������� ��������ϵ ȫ��ҽ�� ����ֿ� ����������չ �����Ч �����ƶȸĸ� �������� ����δ�� ʵ�徭�� ��ѧ��ˮƽ ���㵳�� ���Ȼ� ȫ�潨��С����� �������� ��ʳ��ȫ ����ʵ�� ����ɲ� �������� �������� ���ر��� ����ڵ� ��ѧ�ش� �������� ���������� ��������� ��ȫ��в ��������� �����Ļ� ������� ����Ӱ���� �Ծ����� �Ͷ����� �ִ�����ҵ �������� ���˹�ͬ�� ��ʵ�ƽ� �Ҹ����� ���ļ�ֵ�� ���η��� ����������� ������ ƽ�Ȼ��� ������ҡ��� ����� ��Ҫս�Ի����� ת�侭�÷�չ��ʽ �ӿ�ת�侭�÷�չ��ʽ ���������ļ�ֵ��ϵ ����ȡ���ش��չ ʵ���л�����ΰ���� �����ں�ʽ��չ ��������������Ȼ� �ĸ↑�� ά����ṫƽ����
|
2
5
|
��˿ ���� � ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�
|
6
|
+
��˿ ���� � ��˿�Ļ� ��Q �������Ļ� Ⱥ���Գ� ���ݱ� ��˧
|
7
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#��˧#
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
15
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
16
|
+
|
17
|
+
|
18
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
19
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
20
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
21
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
22
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
23
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
24
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
25
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
26
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
27
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
28
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
data/lib/Data/PKU.map
CHANGED
data/lib/Data/PKU_First.map
CHANGED
data/lib/Data/UserDict.pdat
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/nlpir.rb
CHANGED
@@ -3,7 +3,7 @@ require File.expand_path("../nlpir/version", __FILE__)
|
|
3
3
|
require 'fiddle'
|
4
4
|
require 'fiddle/struct'
|
5
5
|
require 'fiddle/import'
|
6
|
-
require 'fileutils'
|
6
|
+
require 'fileutils'
|
7
7
|
include Fiddle::CParser
|
8
8
|
include Fiddle::Importer
|
9
9
|
|
@@ -14,168 +14,170 @@ module Nlpir
|
|
14
14
|
ICT_POS_MAP_FIRST = 1 #计算所一级标注集
|
15
15
|
ICT_POS_MAP_SECOND = 0 #计算所二级标注集
|
16
16
|
PKU_POS_MAP_SECOND = 2 #北大二级标注集
|
17
|
-
PKU_POS_MAP_FIRST = 3
|
17
|
+
PKU_POS_MAP_FIRST = 3 #北大一级标注集
|
18
18
|
POS_SIZE = 40
|
19
19
|
|
20
20
|
Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
|
21
|
-
'int word_ID','int word_type','
|
22
|
-
|
21
|
+
'int word_ID','int word_type','int weight']
|
22
|
+
|
23
23
|
GBK_CODE = 0 #默认支持GBK编码
|
24
24
|
UTF8_CODE = GBK_CODE + 1 #UTF8编码
|
25
25
|
BIG5_CODE = GBK_CODE + 2 #BIG5编码
|
26
26
|
GBK_FANTI_CODE = GBK_CODE + 3 #GBK编码,里面包含繁体字
|
27
27
|
|
28
|
+
@charset = 'utf-8'
|
28
29
|
|
29
30
|
#提取链接库接口
|
30
31
|
libm = Fiddle.dlopen(File.expand_path("../../bin/libNLPIR.so", __FILE__))
|
31
32
|
|
32
33
|
NLPIR_Init_rb = Fiddle::Function.new(
|
33
|
-
libm['
|
34
|
+
libm['NLPIR_Init'],
|
34
35
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
35
36
|
Fiddle::TYPE_INT
|
36
37
|
)
|
37
38
|
NLPIR_Exit_rb = Fiddle::Function.new(
|
38
|
-
libm['
|
39
|
+
libm['NLPIR_Exit'],
|
39
40
|
[],
|
40
41
|
Fiddle::TYPE_INT
|
41
42
|
)
|
42
43
|
NLPIR_ImportUserDict_rb = Fiddle::Function.new(
|
43
|
-
libm['
|
44
|
+
libm['NLPIR_ImportUserDict'],
|
44
45
|
[Fiddle::TYPE_VOIDP],
|
45
46
|
Fiddle::TYPE_INT
|
46
47
|
)
|
47
48
|
NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
|
48
|
-
libm['
|
49
|
+
libm['NLPIR_ParagraphProcess'],
|
49
50
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
50
51
|
Fiddle::TYPE_VOIDP
|
51
52
|
)
|
52
53
|
NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
|
53
|
-
libm['
|
54
|
+
libm['NLPIR_ParagraphProcessA'],
|
54
55
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
|
55
56
|
Fiddle::TYPE_VOIDP
|
56
57
|
)
|
57
58
|
NLPIR_FileProcess_rb = Fiddle::Function.new(
|
58
|
-
libm['
|
59
|
+
libm['NLPIR_FileProcess'],
|
59
60
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
|
60
61
|
Fiddle::TYPE_DOUBLE
|
61
62
|
)
|
62
63
|
NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
|
63
|
-
libm['
|
64
|
+
libm['NLPIR_GetParagraphProcessAWordCount'],
|
64
65
|
[Fiddle::TYPE_VOIDP],
|
65
66
|
Fiddle::TYPE_INT
|
66
67
|
)
|
67
68
|
NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
|
68
|
-
libm['
|
69
|
+
libm['NLPIR_ParagraphProcessAW'],
|
69
70
|
[Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
|
70
71
|
Fiddle::TYPE_INT
|
71
72
|
)
|
72
73
|
NLPIR_AddUserWord_rb = Fiddle::Function.new(
|
73
|
-
libm['
|
74
|
+
libm['NLPIR_AddUserWord'],
|
74
75
|
[Fiddle::TYPE_VOIDP],
|
75
76
|
Fiddle::TYPE_INT
|
76
77
|
)
|
77
78
|
NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
|
78
|
-
libm['
|
79
|
+
libm['NLPIR_SaveTheUsrDic'],
|
79
80
|
[],
|
80
81
|
Fiddle::TYPE_INT
|
81
82
|
)
|
82
83
|
NLPIR_DelUsrWord_rb = Fiddle::Function.new(
|
83
|
-
libm['
|
84
|
+
libm['NLPIR_DelUsrWord'],
|
84
85
|
[Fiddle::TYPE_VOIDP],
|
85
86
|
Fiddle::TYPE_INT
|
86
87
|
)
|
87
88
|
NLPIR_GetKeyWords_rb = Fiddle::Function.new(
|
88
|
-
libm['
|
89
|
+
libm['NLPIR_GetKeyWords'],
|
89
90
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
90
91
|
Fiddle::TYPE_VOIDP
|
91
92
|
)
|
92
93
|
NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
|
93
|
-
libm['
|
94
|
+
libm['NLPIR_GetFileKeyWords'],
|
94
95
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
95
96
|
Fiddle::TYPE_VOIDP
|
96
97
|
)
|
97
98
|
NLPIR_GetNewWords_rb = Fiddle::Function.new(
|
98
|
-
libm['
|
99
|
+
libm['NLPIR_GetNewWords'],
|
99
100
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
100
101
|
Fiddle::TYPE_VOIDP
|
101
102
|
)
|
102
103
|
NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
|
103
|
-
libm['
|
104
|
+
libm['NLPIR_GetFileNewWords'],
|
104
105
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
105
106
|
Fiddle::TYPE_VOIDP
|
106
107
|
)
|
107
108
|
NLPIR_FingerPrint_rb = Fiddle::Function.new(
|
108
|
-
libm['
|
109
|
+
libm['NLPIR_FingerPrint'],
|
109
110
|
[Fiddle::TYPE_VOIDP],
|
110
111
|
Fiddle::TYPE_LONG
|
111
112
|
)
|
112
113
|
NLPIR_SetPOSmap_rb = Fiddle::Function.new(
|
113
|
-
libm['
|
114
|
+
libm['NLPIR_SetPOSmap'],
|
114
115
|
[Fiddle::TYPE_INT],
|
115
116
|
Fiddle::TYPE_INT
|
116
117
|
)
|
117
118
|
|
118
119
|
NLPIR_NWI_Start_rb = Fiddle::Function.new(
|
119
|
-
libm['
|
120
|
+
libm['NLPIR_NWI_Start'],
|
120
121
|
[],
|
121
122
|
Fiddle::TYPE_INT
|
122
123
|
)
|
123
124
|
NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
124
|
-
libm['
|
125
|
+
libm['NLPIR_NWI_AddFile'],
|
125
126
|
[Fiddle::TYPE_VOIDP],
|
126
127
|
Fiddle::TYPE_INT
|
127
128
|
)
|
128
129
|
NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
|
129
|
-
libm['
|
130
|
+
libm['NLPIR_NWI_AddMem'],
|
130
131
|
[Fiddle::TYPE_VOIDP],
|
131
132
|
Fiddle::TYPE_INT
|
132
133
|
)
|
133
134
|
NLPIR_NWI_Complete_rb = Fiddle::Function.new(
|
134
|
-
libm['
|
135
|
+
libm['NLPIR_NWI_Complete'],
|
135
136
|
[],
|
136
137
|
Fiddle::TYPE_INT
|
137
138
|
)
|
138
139
|
NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
|
139
|
-
libm['
|
140
|
+
libm['NLPIR_NWI_GetResult'],
|
140
141
|
[Fiddle::TYPE_INT],
|
141
142
|
Fiddle::TYPE_VOIDP
|
142
143
|
)
|
143
144
|
NLPIR_NWI_Result2UserDict_rb = Fiddle::Function.new(
|
144
|
-
libm['
|
145
|
+
libm['NLPIR_NWI_Result2UserDict'],
|
145
146
|
[],
|
146
147
|
Fiddle::TYPE_VOIDP
|
147
148
|
)
|
148
149
|
|
149
150
|
#--函数
|
150
151
|
|
151
|
-
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE
|
152
|
-
|
153
|
-
if File.exist?(
|
154
|
-
FileUtils.mkdir(
|
152
|
+
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE)
|
153
|
+
sInitDirPath += "/Data/"
|
154
|
+
if File.exist?(sInitDirPath)==false
|
155
|
+
FileUtils.mkdir(sInitDirPath)
|
155
156
|
filemother = File.expand_path("../Data/", __FILE__)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
NLPIR_Init_rb.call(sInitDirPath,encoding)
|
165
|
-
|
157
|
+
FileUtils.copy_entry filemother,sInitDirPath
|
158
|
+
end
|
159
|
+
@charset = 'gbk' if encoding == GBK_CODE
|
160
|
+
@charset = 'utf-8' if encoding == UTF8_CODE
|
161
|
+
@charset = 'big5' if encoding == BIG5_CODE
|
162
|
+
@charset = 'gbk' if encoding == GBK_FANTI_CODE
|
163
|
+
NLPIR_Init_rb.call(nil,encoding)
|
166
164
|
end
|
165
|
+
alias :nlpir_init :NLPIR_Init
|
167
166
|
|
168
167
|
def NLPIR_Exit()
|
169
168
|
NLPIR_Exit_rb.call()
|
170
169
|
end
|
170
|
+
alias :nlpir_exit :NLPIR_Exit
|
171
171
|
|
172
172
|
def NLPIR_ImportUserDict(sFilename)
|
173
173
|
NLPIR_ImportUserDict_rb.call(sFilename)
|
174
174
|
end
|
175
|
+
alias :import_userdict :NLPIR_ImportUserDict
|
175
176
|
|
176
177
|
def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
|
177
|
-
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s
|
178
|
+
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset)
|
178
179
|
end
|
180
|
+
alias :text_proc :NLPIR_ParagraphProcess
|
179
181
|
|
180
182
|
def NLPIR_ParagraphProcessA(sParagraph)
|
181
183
|
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
@@ -189,86 +191,109 @@ module Nlpir
|
|
189
191
|
end
|
190
192
|
return words_list
|
191
193
|
end
|
194
|
+
alias :text_procA :NLPIR_ParagraphProcessA
|
192
195
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
+
def NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
197
|
+
NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
|
198
|
+
end
|
199
|
+
alias :text_wordcount :NLPIR_GetParagraphProcessAWordCount
|
196
200
|
|
197
|
-
|
198
|
-
|
199
|
-
|
201
|
+
def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
|
202
|
+
NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
|
203
|
+
end
|
204
|
+
alias :file_proc :NLPIR_FileProcess
|
200
205
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
return words_list
|
206
|
+
|
207
|
+
def NLPIR_ParagraphProcessAW(sParagraph)
|
208
|
+
free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
|
209
|
+
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
210
|
+
pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
|
211
|
+
NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
|
212
|
+
words_list = []
|
213
|
+
words_list << Result_t.new(pVecResult)
|
214
|
+
for i in 1...resultCount do
|
215
|
+
words_list << Result_t.new(pVecResult+=Result_t.size)
|
212
216
|
end
|
217
|
+
return words_list
|
218
|
+
end
|
219
|
+
alias :text_procAW :NLPIR_ParagraphProcessAW
|
213
220
|
|
214
|
-
def NLPIR_AddUserWord(sWord)
|
215
|
-
NLPIR_AddUserWord_rb.call(sWord)
|
216
|
-
end
|
217
221
|
|
218
|
-
|
219
|
-
|
220
|
-
|
222
|
+
def NLPIR_AddUserWord(sWord)
|
223
|
+
NLPIR_AddUserWord_rb.call(sWord)
|
224
|
+
end
|
225
|
+
alias :add_userword :NLPIR_AddUserWord
|
221
226
|
|
222
|
-
|
223
|
-
|
224
|
-
|
227
|
+
def NLPIR_SaveTheUsrDic()
|
228
|
+
NLPIR_SaveTheUsrDic_rb.call()
|
229
|
+
end
|
230
|
+
alias :save_userdict :NLPIR_SaveTheUsrDic
|
225
231
|
|
226
|
-
|
227
|
-
|
228
|
-
|
232
|
+
def NLPIR_DelUsrWord(sWord)
|
233
|
+
NLPIR_DelUsrWord_rb.call(sWord)
|
234
|
+
end
|
235
|
+
alias :del_userword :NLPIR_DelUsrWord
|
229
236
|
|
230
|
-
|
231
|
-
|
232
|
-
|
237
|
+
def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
238
|
+
NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
239
|
+
end
|
240
|
+
alias :text_keywords :NLPIR_GetKeyWords
|
233
241
|
|
234
|
-
|
235
|
-
|
236
|
-
|
242
|
+
def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
243
|
+
line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
|
244
|
+
line.force_encoding('gbk')
|
245
|
+
line.encode!(@charset)
|
246
|
+
end
|
247
|
+
alias :file_keywords :NLPIR_GetFileKeyWords
|
237
248
|
|
238
|
-
|
239
|
-
|
240
|
-
|
249
|
+
def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
250
|
+
NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
251
|
+
end
|
252
|
+
alias :text_newwords :NLPIR_GetNewWords
|
241
253
|
|
242
|
-
|
243
|
-
|
244
|
-
|
254
|
+
def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
255
|
+
NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
256
|
+
end
|
257
|
+
alias :file_newwords :NLPIR_GetFileNewWords
|
245
258
|
|
246
|
-
|
247
|
-
|
248
|
-
|
259
|
+
def NLPIR_FingerPrint(sLine)
|
260
|
+
NLPIR_FingerPrint_rb.call(sLine)
|
261
|
+
end
|
262
|
+
alias :text_fingerprint :NLPIR_FingerPrint
|
249
263
|
|
250
|
-
|
251
|
-
|
252
|
-
|
264
|
+
def NLPIR_SetPOSmap(nPOSmap)
|
265
|
+
NLPIR_SetPOSmap_rb.call(nPOSmap)
|
266
|
+
end
|
267
|
+
alias :setPOSmap :NLPIR_SetPOSmap
|
253
268
|
|
254
|
-
|
255
|
-
|
256
|
-
|
269
|
+
def NLPIR_NWI_Start()
|
270
|
+
NLPIR_NWI_Start_rb.call()
|
271
|
+
end
|
272
|
+
alias :NWI_start :NLPIR_NWI_Start
|
257
273
|
|
258
|
-
|
259
|
-
|
260
|
-
|
274
|
+
def NLPIR_NWI_AddFile(sFilename)
|
275
|
+
NLPIR_NWI_AddFile_rb.call(sFilename)
|
276
|
+
end
|
277
|
+
alias :NWI_addfile :NLPIR_NWI_AddFile
|
261
278
|
|
262
|
-
|
263
|
-
|
264
|
-
|
279
|
+
def NLPIR_NWI_AddMem(sFilename)
|
280
|
+
NLPIR_NWI_AddMem_rb.call(sFilename)
|
281
|
+
end
|
282
|
+
alias :NWI_addmem :NLPIR_NWI_AddMem
|
265
283
|
|
266
|
-
|
267
|
-
|
268
|
-
|
284
|
+
def NLPIR_NWI_Complete()
|
285
|
+
NLPIR_NWI_Complete_rb.call()
|
286
|
+
end
|
287
|
+
alias :NWI_complete :NLPIR_NWI_Complete
|
269
288
|
|
270
|
-
|
271
|
-
|
272
|
-
|
289
|
+
def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
|
290
|
+
NLPIR_NWI_GetResult_rb.call(bWeightOut)
|
291
|
+
end
|
292
|
+
alias :NWI_result :NLPIR_NWI_GetResult
|
293
|
+
|
294
|
+
def NLPIR_NWI_Result2UserDict()
|
295
|
+
NLPIR_NWI_Result2UserDict_rb.call()
|
296
|
+
end
|
297
|
+
alias :NWI_result2userdict :NLPIR_NWI_Result2UserDict
|
273
298
|
|
274
299
|
end
|