nlpir 0.1.0-x86-linux → 1.0.0-x86-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +160 -23
- data/bin/lib.txt +1738 -0
- data/bin/libNLPIR.so +0 -0
- data/lib/Data/Configure.xml +4 -3
- data/{test/Data/NLPIR.user → lib/Data/DocExtractor.user} +0 -0
- data/lib/Data/English/English.pdat +0 -0
- data/lib/Data/English/English.pos +0 -0
- data/lib/Data/English/English.ung +0 -0
- data/lib/Data/English/English.wordlist +0 -0
- data/lib/Data/English/Irrel2regular.map +0 -0
- data/lib/Data/English/ne.pdat +0 -0
- data/lib/Data/English/ne.pos +0 -0
- data/lib/Data/English/ne.wordlist +0 -0
- data/lib/Data/FTU8.pdat +0 -0
- data/lib/Data/FTU8.wordlist +0 -0
- data/lib/Data/FTU82GBK.map +0 -0
- data/lib/Data/FieldDict.pdat +0 -0
- data/lib/Data/FieldDict.pos +0 -0
- data/lib/Data/GBK2FTU8.map +0 -0
- data/lib/Data/ICTPOS.map +4 -0
- data/lib/Data/NLPIR.user +0 -0
- data/lib/Data/NewWord.lst +26 -0
- data/lib/Data/PKU.map +4 -0
- data/lib/Data/PKU_First.map +4 -0
- data/lib/Data/UserDict.pdat +0 -0
- data/lib/Data/location.map +0 -0
- data/{test/Data/FieldDict.pdat → lib/Data/location.pdat} +0 -0
- data/lib/Data/location.wordlist +0 -0
- data/lib/Data/sentiment.pdat +0 -0
- data/lib/Data/sentiment.ung +0 -0
- data/lib/nlpir.rb +129 -104
- data/lib/nlpir/version.rb +1 -1
- data/nlpir.gemspec +1 -1
- data/test/findnewword.txt +103 -0
- data/test/test_nlpir.rb +137 -133
- data/test/test_result.txt +52 -35
- metadata +32 -90
- data/test/Data/BIG2GBK.map +0 -0
- data/test/Data/BIG5.pdat +0 -0
- data/test/Data/BIG5.wordlist +0 -0
- data/test/Data/BiWord.big +0 -0
- data/test/Data/Configure.xml +0 -15
- data/test/Data/CoreDict.pdat +0 -0
- data/test/Data/CoreDict.pos +0 -0
- data/test/Data/CoreDict.unig +0 -0
- data/test/Data/FieldDict.pos +0 -0
- data/test/Data/GBK.pdat +0 -0
- data/test/Data/GBK.wordlist +0 -0
- data/test/Data/GBK2BIG.map +0 -0
- data/test/Data/GBK2GBKC.map +0 -0
- data/test/Data/GBK2UTF.map +0 -0
- data/test/Data/GBKA.pdat +0 -0
- data/test/Data/GBKA.wordlist +0 -0
- data/test/Data/GBKA2UTF.map +0 -0
- data/test/Data/GBKC.pdat +0 -0
- data/test/Data/GBKC.wordlist +0 -0
- data/test/Data/GBKC2GBK.map +0 -0
- data/test/Data/GranDict.pdat +6 -3371
- data/test/Data/GranDict.pos +0 -0
- data/test/Data/ICTPOS.map +0 -96
- data/test/Data/NLPIR.ctx +0 -0
- data/test/Data/NLPIR_First.map +0 -96
- data/test/Data/NewWord.lst +0 -3
- data/test/Data/PKU.map +0 -96
- data/test/Data/PKU_First.map +0 -96
- data/test/Data/UTF2GBK.map +0 -0
- data/test/Data/UTF2GBKA.map +0 -0
- data/test/Data/UTF8.pdat +0 -0
- data/test/Data/UTF8.wordlist +0 -0
- data/test/Data/UserDict.pdat +0 -0
- data/test/Data/charset.type +0 -0
- data/test/Data/nr.ctx +0 -0
- data/test/Data/nr.fsa +0 -0
- data/test/Data/nr.role +0 -0
data/bin/libNLPIR.so
CHANGED
Binary file
|
data/lib/Data/Configure.xml
CHANGED
@@ -2,13 +2,14 @@
|
|
2
2
|
<NLPIR>
|
3
3
|
<TagSet>ICTPOS.map</TagSet>//���Ա�ע��ӳ���ļ�
|
4
4
|
<UserDict>on</UserDict>//On��UserDictionaryapplied;Off:notapplied��
|
5
|
-
<UserDictPrior>
|
6
|
-
<FieldDict>
|
5
|
+
<UserDictPrior>Off</UserDictPrior>//�û��ʵ�����,Addedin2006-03-16,requiredbyNECOn���û��ʵ�ͺ��Ĵʵ���ͬʱ�еĴʻ㣬�û��ʵ����ȣ������ܲ�Ҫ���ã���������Ĵʵ��еĴʶ�����Ϊ�û��ʵ䣬��Ч���ʵ��䷴
|
6
|
+
<FieldDict>on</FieldDict>//On��FieldDictionaryapplied;Off:notapplied��
|
7
7
|
<GranularityContorl>off</GranularityContorl>
|
8
|
-
<Log>
|
8
|
+
<Log>Off</Log>//On,Off�����磺Off:�ر���־���ܣ�On:����־����
|
9
9
|
<version>2013</version>//ϵͳ�汾��
|
10
10
|
<Modify>2012-11-14</Modify>//ϵͳ�����ʱ��
|
11
11
|
<Lexicon>2012-11-14</Lexicon>//�ʵ������ʱ��
|
12
|
+
<Sentiment>On</Sentiment>//On,Off�����磺Off:�ر���з������ܣ�On:����з�������
|
12
13
|
<adaptive>true</adaptive>//����Ӧ�ִʣ�Ĭ��Ϊfalse������Ӧ�ִʵ�Ч�ʻ�ϵ�
|
13
14
|
<author>�Ż�ƽ��ʿ</author>//����
|
14
15
|
<Contact>pipy_zhang@msn.com</Contact>//������ϵ��ʽ
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/FTU8.pdat
ADDED
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/FieldDict.pdat
CHANGED
Binary file
|
data/lib/Data/FieldDict.pos
CHANGED
Binary file
|
Binary file
|
data/lib/Data/ICTPOS.map
CHANGED
data/lib/Data/NLPIR.user
CHANGED
Binary file
|
data/lib/Data/NewWord.lst
CHANGED
@@ -1,2 +1,28 @@
|
|
1
|
+
|
1
2
|
��˿ ���� � ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�
|
3
|
+
����ɽ �ΰ������� ���������� �������� ��Ʒ���� ��������
|
4
|
+
�й���ɫ������� ������������ ��ѧ��չ�� ��̬���� ��ṫƽ���� �������� �л�����ΰ���� ���幦���� �����ռ俪�� ��������� ���緢չһ�廯 С����� �������� ��������Ļ�ǿ�� ��ҵ��λ ���� ����ѡ�� �������� ���� ��������г��������� ��������� �������� ȫ������ ��Ҫս�Ի��� ȡ���ش��չ ��̬ϵͳ �Ȳ��ƽ� ���� �ӿ�ת�� ���ʽ���Σ�� ���¶����� ������������ ��������Ƚ��Ļ� ũҵ�ۺ��������� ʳƷҩƷ��ȫ ��۵�����ϵ ����ͬԸ�� �������� ˾������ �������ϱ��� ȫ���л���Ů ս�������˲�ҵ ����Ϸ�Ȩ�� ȫ������ͬ ������������ ��ǰ����ƽ�� �Ļ���ʵ�� ��֤�������� ��������ϵ ȫ��ҽ�� ����ֿ� ����������չ �����Ч �����ƶȸĸ� �������� ����δ�� ʵ�徭�� ��ѧ��ˮƽ ���㵳�� ���Ȼ� ȫ�潨��С����� �������� ��ʳ��ȫ ����ʵ�� ����ɲ� �������� �������� ���ر��� ����ڵ� ��ѧ�ش� �������� ���������� ��������� ��ȫ��в ��������� �����Ļ� ������� ����Ӱ���� �Ծ����� �Ͷ����� �ִ�����ҵ �������� ���˹�ͬ�� ��ʵ�ƽ� �Ҹ����� ���ļ�ֵ�� ���η��� ����������� ������ ƽ�Ȼ��� ������ҡ��� ����� ��Ҫս�Ի����� ת�侭�÷�չ��ʽ �ӿ�ת�侭�÷�չ��ʽ ���������ļ�ֵ��ϵ ����ȡ���ش��չ ʵ���л�����ΰ���� �����ں�ʽ��չ ��������������Ȼ� �ĸ↑�� ά����ṫƽ����
|
2
5
|
��˿ ���� � ��Q �������Ļ� ���� Ⱥ���Գ� ���ݱ�
|
6
|
+
��˿ ���� � ��˿�Ļ� ��Q �������Ļ� Ⱥ���Գ� ���ݱ� ��˧
|
7
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#��˧#
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
15
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
16
|
+
|
17
|
+
|
18
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
19
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
20
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
21
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
22
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
23
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
24
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
25
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
26
|
+
������������#��Ӫ�Թ�Ĺ#�������Թ�Ĺ#�����������#�������#�����Թ�Ĺ����#��������#�����Թ�Ĺ#���й�Ĺ#��������г�#������������#������������#���ᱩ��#�����г�#��������#������Ĺ#��չˮƽ#�����ƶ�#������ҵ#�ʽ�Ͷ��#������������#���ڱ���#������#�ؼ�����#��Ĺ�۸�#�������#���Ƿ���#����Ĺ��#���ⲹ��#ɥ����Ʒ#������Ʒ#�ǻҼĴ�#�������Թ�Ĺ����#
|
27
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
28
|
+
��˿#����#�#��˿�Ļ�#��Q#�������Ļ�#Ⱥ���Գ�#���ݱ�#
|
data/lib/Data/PKU.map
CHANGED
data/lib/Data/PKU_First.map
CHANGED
data/lib/Data/UserDict.pdat
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/nlpir.rb
CHANGED
@@ -3,7 +3,7 @@ require File.expand_path("../nlpir/version", __FILE__)
|
|
3
3
|
require 'fiddle'
|
4
4
|
require 'fiddle/struct'
|
5
5
|
require 'fiddle/import'
|
6
|
-
require 'fileutils'
|
6
|
+
require 'fileutils'
|
7
7
|
include Fiddle::CParser
|
8
8
|
include Fiddle::Importer
|
9
9
|
|
@@ -14,168 +14,170 @@ module Nlpir
|
|
14
14
|
ICT_POS_MAP_FIRST = 1 #计算所一级标注集
|
15
15
|
ICT_POS_MAP_SECOND = 0 #计算所二级标注集
|
16
16
|
PKU_POS_MAP_SECOND = 2 #北大二级标注集
|
17
|
-
PKU_POS_MAP_FIRST = 3
|
17
|
+
PKU_POS_MAP_FIRST = 3 #北大一级标注集
|
18
18
|
POS_SIZE = 40
|
19
19
|
|
20
20
|
Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
|
21
|
-
'int word_ID','int word_type','
|
22
|
-
|
21
|
+
'int word_ID','int word_type','int weight']
|
22
|
+
|
23
23
|
GBK_CODE = 0 #默认支持GBK编码
|
24
24
|
UTF8_CODE = GBK_CODE + 1 #UTF8编码
|
25
25
|
BIG5_CODE = GBK_CODE + 2 #BIG5编码
|
26
26
|
GBK_FANTI_CODE = GBK_CODE + 3 #GBK编码,里面包含繁体字
|
27
27
|
|
28
|
+
@charset = 'utf-8'
|
28
29
|
|
29
30
|
#提取链接库接口
|
30
31
|
libm = Fiddle.dlopen(File.expand_path("../../bin/libNLPIR.so", __FILE__))
|
31
32
|
|
32
33
|
NLPIR_Init_rb = Fiddle::Function.new(
|
33
|
-
libm['
|
34
|
+
libm['NLPIR_Init'],
|
34
35
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
35
36
|
Fiddle::TYPE_INT
|
36
37
|
)
|
37
38
|
NLPIR_Exit_rb = Fiddle::Function.new(
|
38
|
-
libm['
|
39
|
+
libm['NLPIR_Exit'],
|
39
40
|
[],
|
40
41
|
Fiddle::TYPE_INT
|
41
42
|
)
|
42
43
|
NLPIR_ImportUserDict_rb = Fiddle::Function.new(
|
43
|
-
libm['
|
44
|
+
libm['NLPIR_ImportUserDict'],
|
44
45
|
[Fiddle::TYPE_VOIDP],
|
45
46
|
Fiddle::TYPE_INT
|
46
47
|
)
|
47
48
|
NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
|
48
|
-
libm['
|
49
|
+
libm['NLPIR_ParagraphProcess'],
|
49
50
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
50
51
|
Fiddle::TYPE_VOIDP
|
51
52
|
)
|
52
53
|
NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
|
53
|
-
libm['
|
54
|
+
libm['NLPIR_ParagraphProcessA'],
|
54
55
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
|
55
56
|
Fiddle::TYPE_VOIDP
|
56
57
|
)
|
57
58
|
NLPIR_FileProcess_rb = Fiddle::Function.new(
|
58
|
-
libm['
|
59
|
+
libm['NLPIR_FileProcess'],
|
59
60
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
|
60
61
|
Fiddle::TYPE_DOUBLE
|
61
62
|
)
|
62
63
|
NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
|
63
|
-
libm['
|
64
|
+
libm['NLPIR_GetParagraphProcessAWordCount'],
|
64
65
|
[Fiddle::TYPE_VOIDP],
|
65
66
|
Fiddle::TYPE_INT
|
66
67
|
)
|
67
68
|
NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
|
68
|
-
libm['
|
69
|
+
libm['NLPIR_ParagraphProcessAW'],
|
69
70
|
[Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
|
70
71
|
Fiddle::TYPE_INT
|
71
72
|
)
|
72
73
|
NLPIR_AddUserWord_rb = Fiddle::Function.new(
|
73
|
-
libm['
|
74
|
+
libm['NLPIR_AddUserWord'],
|
74
75
|
[Fiddle::TYPE_VOIDP],
|
75
76
|
Fiddle::TYPE_INT
|
76
77
|
)
|
77
78
|
NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
|
78
|
-
libm['
|
79
|
+
libm['NLPIR_SaveTheUsrDic'],
|
79
80
|
[],
|
80
81
|
Fiddle::TYPE_INT
|
81
82
|
)
|
82
83
|
NLPIR_DelUsrWord_rb = Fiddle::Function.new(
|
83
|
-
libm['
|
84
|
+
libm['NLPIR_DelUsrWord'],
|
84
85
|
[Fiddle::TYPE_VOIDP],
|
85
86
|
Fiddle::TYPE_INT
|
86
87
|
)
|
87
88
|
NLPIR_GetKeyWords_rb = Fiddle::Function.new(
|
88
|
-
libm['
|
89
|
+
libm['NLPIR_GetKeyWords'],
|
89
90
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
90
91
|
Fiddle::TYPE_VOIDP
|
91
92
|
)
|
92
93
|
NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
|
93
|
-
libm['
|
94
|
+
libm['NLPIR_GetFileKeyWords'],
|
94
95
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
95
96
|
Fiddle::TYPE_VOIDP
|
96
97
|
)
|
97
98
|
NLPIR_GetNewWords_rb = Fiddle::Function.new(
|
98
|
-
libm['
|
99
|
+
libm['NLPIR_GetNewWords'],
|
99
100
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
100
101
|
Fiddle::TYPE_VOIDP
|
101
102
|
)
|
102
103
|
NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
|
103
|
-
libm['
|
104
|
+
libm['NLPIR_GetFileNewWords'],
|
104
105
|
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
105
106
|
Fiddle::TYPE_VOIDP
|
106
107
|
)
|
107
108
|
NLPIR_FingerPrint_rb = Fiddle::Function.new(
|
108
|
-
libm['
|
109
|
+
libm['NLPIR_FingerPrint'],
|
109
110
|
[Fiddle::TYPE_VOIDP],
|
110
111
|
Fiddle::TYPE_LONG
|
111
112
|
)
|
112
113
|
NLPIR_SetPOSmap_rb = Fiddle::Function.new(
|
113
|
-
libm['
|
114
|
+
libm['NLPIR_SetPOSmap'],
|
114
115
|
[Fiddle::TYPE_INT],
|
115
116
|
Fiddle::TYPE_INT
|
116
117
|
)
|
117
118
|
|
118
119
|
NLPIR_NWI_Start_rb = Fiddle::Function.new(
|
119
|
-
libm['
|
120
|
+
libm['NLPIR_NWI_Start'],
|
120
121
|
[],
|
121
122
|
Fiddle::TYPE_INT
|
122
123
|
)
|
123
124
|
NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
124
|
-
libm['
|
125
|
+
libm['NLPIR_NWI_AddFile'],
|
125
126
|
[Fiddle::TYPE_VOIDP],
|
126
127
|
Fiddle::TYPE_INT
|
127
128
|
)
|
128
129
|
NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
|
129
|
-
libm['
|
130
|
+
libm['NLPIR_NWI_AddMem'],
|
130
131
|
[Fiddle::TYPE_VOIDP],
|
131
132
|
Fiddle::TYPE_INT
|
132
133
|
)
|
133
134
|
NLPIR_NWI_Complete_rb = Fiddle::Function.new(
|
134
|
-
libm['
|
135
|
+
libm['NLPIR_NWI_Complete'],
|
135
136
|
[],
|
136
137
|
Fiddle::TYPE_INT
|
137
138
|
)
|
138
139
|
NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
|
139
|
-
libm['
|
140
|
+
libm['NLPIR_NWI_GetResult'],
|
140
141
|
[Fiddle::TYPE_INT],
|
141
142
|
Fiddle::TYPE_VOIDP
|
142
143
|
)
|
143
144
|
NLPIR_NWI_Result2UserDict_rb = Fiddle::Function.new(
|
144
|
-
libm['
|
145
|
+
libm['NLPIR_NWI_Result2UserDict'],
|
145
146
|
[],
|
146
147
|
Fiddle::TYPE_VOIDP
|
147
148
|
)
|
148
149
|
|
149
150
|
#--函数
|
150
151
|
|
151
|
-
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE
|
152
|
-
|
153
|
-
if File.exist?(
|
154
|
-
FileUtils.mkdir(
|
152
|
+
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE)
|
153
|
+
sInitDirPath += "/Data/"
|
154
|
+
if File.exist?(sInitDirPath)==false
|
155
|
+
FileUtils.mkdir(sInitDirPath)
|
155
156
|
filemother = File.expand_path("../Data/", __FILE__)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
NLPIR_Init_rb.call(sInitDirPath,encoding)
|
165
|
-
|
157
|
+
FileUtils.copy_entry filemother,sInitDirPath
|
158
|
+
end
|
159
|
+
@charset = 'gbk' if encoding == GBK_CODE
|
160
|
+
@charset = 'utf-8' if encoding == UTF8_CODE
|
161
|
+
@charset = 'big5' if encoding == BIG5_CODE
|
162
|
+
@charset = 'gbk' if encoding == GBK_FANTI_CODE
|
163
|
+
NLPIR_Init_rb.call(nil,encoding)
|
166
164
|
end
|
165
|
+
alias :nlpir_init :NLPIR_Init
|
167
166
|
|
168
167
|
def NLPIR_Exit()
|
169
168
|
NLPIR_Exit_rb.call()
|
170
169
|
end
|
170
|
+
alias :nlpir_exit :NLPIR_Exit
|
171
171
|
|
172
172
|
def NLPIR_ImportUserDict(sFilename)
|
173
173
|
NLPIR_ImportUserDict_rb.call(sFilename)
|
174
174
|
end
|
175
|
+
alias :import_userdict :NLPIR_ImportUserDict
|
175
176
|
|
176
177
|
def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
|
177
|
-
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s
|
178
|
+
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s.force_encoding(@charset)
|
178
179
|
end
|
180
|
+
alias :text_proc :NLPIR_ParagraphProcess
|
179
181
|
|
180
182
|
def NLPIR_ParagraphProcessA(sParagraph)
|
181
183
|
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
@@ -189,86 +191,109 @@ module Nlpir
|
|
189
191
|
end
|
190
192
|
return words_list
|
191
193
|
end
|
194
|
+
alias :text_procA :NLPIR_ParagraphProcessA
|
192
195
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
+
def NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
197
|
+
NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
|
198
|
+
end
|
199
|
+
alias :text_wordcount :NLPIR_GetParagraphProcessAWordCount
|
196
200
|
|
197
|
-
|
198
|
-
|
199
|
-
|
201
|
+
def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
|
202
|
+
NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
|
203
|
+
end
|
204
|
+
alias :file_proc :NLPIR_FileProcess
|
200
205
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
return words_list
|
206
|
+
|
207
|
+
def NLPIR_ParagraphProcessAW(sParagraph)
|
208
|
+
free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
|
209
|
+
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
210
|
+
pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
|
211
|
+
NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
|
212
|
+
words_list = []
|
213
|
+
words_list << Result_t.new(pVecResult)
|
214
|
+
for i in 1...resultCount do
|
215
|
+
words_list << Result_t.new(pVecResult+=Result_t.size)
|
212
216
|
end
|
217
|
+
return words_list
|
218
|
+
end
|
219
|
+
alias :text_procAW :NLPIR_ParagraphProcessAW
|
213
220
|
|
214
|
-
def NLPIR_AddUserWord(sWord)
|
215
|
-
NLPIR_AddUserWord_rb.call(sWord)
|
216
|
-
end
|
217
221
|
|
218
|
-
|
219
|
-
|
220
|
-
|
222
|
+
def NLPIR_AddUserWord(sWord)
|
223
|
+
NLPIR_AddUserWord_rb.call(sWord)
|
224
|
+
end
|
225
|
+
alias :add_userword :NLPIR_AddUserWord
|
221
226
|
|
222
|
-
|
223
|
-
|
224
|
-
|
227
|
+
def NLPIR_SaveTheUsrDic()
|
228
|
+
NLPIR_SaveTheUsrDic_rb.call()
|
229
|
+
end
|
230
|
+
alias :save_userdict :NLPIR_SaveTheUsrDic
|
225
231
|
|
226
|
-
|
227
|
-
|
228
|
-
|
232
|
+
def NLPIR_DelUsrWord(sWord)
|
233
|
+
NLPIR_DelUsrWord_rb.call(sWord)
|
234
|
+
end
|
235
|
+
alias :del_userword :NLPIR_DelUsrWord
|
229
236
|
|
230
|
-
|
231
|
-
|
232
|
-
|
237
|
+
def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
238
|
+
NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
239
|
+
end
|
240
|
+
alias :text_keywords :NLPIR_GetKeyWords
|
233
241
|
|
234
|
-
|
235
|
-
|
236
|
-
|
242
|
+
def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
243
|
+
line = NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
|
244
|
+
line.force_encoding('gbk')
|
245
|
+
line.encode!(@charset)
|
246
|
+
end
|
247
|
+
alias :file_keywords :NLPIR_GetFileKeyWords
|
237
248
|
|
238
|
-
|
239
|
-
|
240
|
-
|
249
|
+
def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
250
|
+
NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
251
|
+
end
|
252
|
+
alias :text_newwords :NLPIR_GetNewWords
|
241
253
|
|
242
|
-
|
243
|
-
|
244
|
-
|
254
|
+
def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
255
|
+
NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s.force_encoding(@charset)
|
256
|
+
end
|
257
|
+
alias :file_newwords :NLPIR_GetFileNewWords
|
245
258
|
|
246
|
-
|
247
|
-
|
248
|
-
|
259
|
+
def NLPIR_FingerPrint(sLine)
|
260
|
+
NLPIR_FingerPrint_rb.call(sLine)
|
261
|
+
end
|
262
|
+
alias :text_fingerprint :NLPIR_FingerPrint
|
249
263
|
|
250
|
-
|
251
|
-
|
252
|
-
|
264
|
+
def NLPIR_SetPOSmap(nPOSmap)
|
265
|
+
NLPIR_SetPOSmap_rb.call(nPOSmap)
|
266
|
+
end
|
267
|
+
alias :setPOSmap :NLPIR_SetPOSmap
|
253
268
|
|
254
|
-
|
255
|
-
|
256
|
-
|
269
|
+
def NLPIR_NWI_Start()
|
270
|
+
NLPIR_NWI_Start_rb.call()
|
271
|
+
end
|
272
|
+
alias :NWI_start :NLPIR_NWI_Start
|
257
273
|
|
258
|
-
|
259
|
-
|
260
|
-
|
274
|
+
def NLPIR_NWI_AddFile(sFilename)
|
275
|
+
NLPIR_NWI_AddFile_rb.call(sFilename)
|
276
|
+
end
|
277
|
+
alias :NWI_addfile :NLPIR_NWI_AddFile
|
261
278
|
|
262
|
-
|
263
|
-
|
264
|
-
|
279
|
+
def NLPIR_NWI_AddMem(sFilename)
|
280
|
+
NLPIR_NWI_AddMem_rb.call(sFilename)
|
281
|
+
end
|
282
|
+
alias :NWI_addmem :NLPIR_NWI_AddMem
|
265
283
|
|
266
|
-
|
267
|
-
|
268
|
-
|
284
|
+
def NLPIR_NWI_Complete()
|
285
|
+
NLPIR_NWI_Complete_rb.call()
|
286
|
+
end
|
287
|
+
alias :NWI_complete :NLPIR_NWI_Complete
|
269
288
|
|
270
|
-
|
271
|
-
|
272
|
-
|
289
|
+
def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
|
290
|
+
NLPIR_NWI_GetResult_rb.call(bWeightOut)
|
291
|
+
end
|
292
|
+
alias :NWI_result :NLPIR_NWI_GetResult
|
293
|
+
|
294
|
+
def NLPIR_NWI_Result2UserDict()
|
295
|
+
NLPIR_NWI_Result2UserDict_rb.call()
|
296
|
+
end
|
297
|
+
alias :NWI_result2userdict :NLPIR_NWI_Result2UserDict
|
273
298
|
|
274
299
|
end
|