nlpir 0.0.3-x86-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/LICENSE.txt +22 -0
- data/README.md +197 -0
- data/Rakefile +11 -0
- data/bin/lib.txt +44 -0
- data/bin/libNLPIR.so +0 -0
- data/lib/Data/BIG2GBK.map +0 -0
- data/lib/Data/BIG5.pdat +0 -0
- data/lib/Data/BIG5.wordlist +0 -0
- data/lib/Data/BiWord.big +0 -0
- data/lib/Data/Configure.xml +15 -0
- data/lib/Data/CoreDict.pdat +0 -0
- data/lib/Data/CoreDict.pos +0 -0
- data/lib/Data/CoreDict.unig +0 -0
- data/lib/Data/FieldDict.pdat +0 -0
- data/lib/Data/FieldDict.pos +0 -0
- data/lib/Data/GBK.pdat +0 -0
- data/lib/Data/GBK.wordlist +0 -0
- data/lib/Data/GBK2BIG.map +0 -0
- data/lib/Data/GBK2GBKC.map +0 -0
- data/lib/Data/GBK2UTF.map +0 -0
- data/lib/Data/GBKA.pdat +0 -0
- data/lib/Data/GBKA.wordlist +0 -0
- data/lib/Data/GBKA2UTF.map +0 -0
- data/lib/Data/GBKC.pdat +0 -0
- data/lib/Data/GBKC.wordlist +0 -0
- data/lib/Data/GBKC2GBK.map +0 -0
- data/lib/Data/GranDict.pdat +3369 -8
- data/lib/Data/GranDict.pos +0 -0
- data/lib/Data/ICTPOS.map +96 -0
- data/lib/Data/NLPIR.ctx +0 -0
- data/lib/Data/NLPIR.user +0 -0
- data/lib/Data/NLPIR_First.map +96 -0
- data/lib/Data/NewWord.lst +25 -0
- data/lib/Data/PKU.map +96 -0
- data/lib/Data/PKU_First.map +96 -0
- data/lib/Data/UTF2GBK.map +0 -0
- data/lib/Data/UTF2GBKA.map +0 -0
- data/lib/Data/UTF8.pdat +0 -0
- data/lib/Data/UTF8.wordlist +0 -0
- data/lib/Data/UserDict.pdat +0 -0
- data/lib/Data/charset.type +0 -0
- data/lib/Data/nr.ctx +0 -0
- data/lib/Data/nr.fsa +0 -0
- data/lib/Data/nr.role +0 -0
- data/lib/nlpir.rb +274 -0
- data/lib/nlpir/version.rb +3 -0
- data/nlpir.gemspec +24 -0
- data/test/Data/BIG2GBK.map +0 -0
- data/test/Data/BIG5.pdat +0 -0
- data/test/Data/BIG5.wordlist +0 -0
- data/test/Data/BiWord.big +0 -0
- data/test/Data/Configure.xml +15 -0
- data/test/Data/CoreDict.pdat +0 -0
- data/test/Data/CoreDict.pos +0 -0
- data/test/Data/CoreDict.unig +0 -0
- data/test/Data/FieldDict.pdat +0 -0
- data/test/Data/FieldDict.pos +0 -0
- data/test/Data/GBK.pdat +0 -0
- data/test/Data/GBK.wordlist +0 -0
- data/test/Data/GBK2BIG.map +0 -0
- data/test/Data/GBK2GBKC.map +0 -0
- data/test/Data/GBK2UTF.map +0 -0
- data/test/Data/GBKA.pdat +0 -0
- data/test/Data/GBKA.wordlist +0 -0
- data/test/Data/GBKA2UTF.map +0 -0
- data/test/Data/GBKC.pdat +0 -0
- data/test/Data/GBKC.wordlist +0 -0
- data/test/Data/GBKC2GBK.map +0 -0
- data/test/Data/GranDict.pdat +3369 -8
- data/test/Data/GranDict.pos +0 -0
- data/test/Data/ICTPOS.map +96 -0
- data/test/Data/NLPIR.ctx +0 -0
- data/test/Data/NLPIR.user +0 -0
- data/test/Data/NLPIR_First.map +96 -0
- data/test/Data/NewWord.lst +41 -0
- data/test/Data/PKU.map +96 -0
- data/test/Data/PKU_First.map +96 -0
- data/test/Data/UTF2GBK.map +0 -0
- data/test/Data/UTF2GBKA.map +0 -0
- data/test/Data/UTF8.pdat +0 -0
- data/test/Data/UTF8.wordlist +0 -0
- data/test/Data/UserDict.pdat +0 -0
- data/test/Data/charset.type +0 -0
- data/test/Data/nr.ctx +0 -0
- data/test/Data/nr.fsa +0 -0
- data/test/Data/nr.role +0 -0
- data/test/test.txt +52 -0
- data/test/test_nlpir.rb +148 -0
- data/test/test_result.txt +87 -0
- data/test/userdict.txt +5 -0
- metadata +208 -0
Binary file
|
data/lib/Data/ICTPOS.map
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
1
|
2
|
+
4
|
3
|
+
a
|
4
|
+
ad
|
5
|
+
ag
|
6
|
+
al
|
7
|
+
an
|
8
|
+
b
|
9
|
+
bl
|
10
|
+
c
|
11
|
+
cc
|
12
|
+
d
|
13
|
+
dg
|
14
|
+
dl
|
15
|
+
e
|
16
|
+
f
|
17
|
+
h
|
18
|
+
k
|
19
|
+
m
|
20
|
+
Mg
|
21
|
+
mq
|
22
|
+
n
|
23
|
+
ng
|
24
|
+
nl
|
25
|
+
nr
|
26
|
+
nr1
|
27
|
+
nr2
|
28
|
+
nrf
|
29
|
+
nrj
|
30
|
+
ns
|
31
|
+
nsf
|
32
|
+
nt
|
33
|
+
nz
|
34
|
+
o
|
35
|
+
p
|
36
|
+
pba
|
37
|
+
pbei
|
38
|
+
q
|
39
|
+
qt
|
40
|
+
qv
|
41
|
+
r
|
42
|
+
Rg
|
43
|
+
rr
|
44
|
+
ry
|
45
|
+
rys
|
46
|
+
ryt
|
47
|
+
ryv
|
48
|
+
rz
|
49
|
+
rzs
|
50
|
+
rzt
|
51
|
+
rzv
|
52
|
+
s
|
53
|
+
t
|
54
|
+
tg
|
55
|
+
u
|
56
|
+
ude1
|
57
|
+
ude2
|
58
|
+
ude3
|
59
|
+
udeng
|
60
|
+
udh
|
61
|
+
uguo
|
62
|
+
ule
|
63
|
+
ulian
|
64
|
+
uls
|
65
|
+
usuo
|
66
|
+
uyy
|
67
|
+
uzhe
|
68
|
+
uzhi
|
69
|
+
v
|
70
|
+
vd
|
71
|
+
vf
|
72
|
+
vg
|
73
|
+
vi
|
74
|
+
vl
|
75
|
+
vn
|
76
|
+
vshi
|
77
|
+
vx
|
78
|
+
vyou
|
79
|
+
w
|
80
|
+
wb
|
81
|
+
wd
|
82
|
+
wf
|
83
|
+
wj
|
84
|
+
wky
|
85
|
+
wkz
|
86
|
+
wm
|
87
|
+
wn
|
88
|
+
wp
|
89
|
+
ws
|
90
|
+
wt
|
91
|
+
ww
|
92
|
+
wyy
|
93
|
+
wyz
|
94
|
+
x
|
95
|
+
y
|
96
|
+
z
|
data/lib/Data/NLPIR.ctx
ADDED
Binary file
|
data/lib/Data/NLPIR.user
ADDED
Binary file
|
@@ -0,0 +1,96 @@
|
|
1
|
+
1
|
2
|
+
4
|
3
|
+
a
|
4
|
+
a
|
5
|
+
a
|
6
|
+
a
|
7
|
+
a
|
8
|
+
b
|
9
|
+
b
|
10
|
+
c
|
11
|
+
c
|
12
|
+
d
|
13
|
+
d
|
14
|
+
d
|
15
|
+
e
|
16
|
+
f
|
17
|
+
h
|
18
|
+
k
|
19
|
+
m
|
20
|
+
m
|
21
|
+
m
|
22
|
+
n
|
23
|
+
n
|
24
|
+
n
|
25
|
+
n
|
26
|
+
n
|
27
|
+
n
|
28
|
+
n
|
29
|
+
n
|
30
|
+
n
|
31
|
+
n
|
32
|
+
n
|
33
|
+
n
|
34
|
+
o
|
35
|
+
p
|
36
|
+
p
|
37
|
+
p
|
38
|
+
q
|
39
|
+
q
|
40
|
+
q
|
41
|
+
r
|
42
|
+
R
|
43
|
+
r
|
44
|
+
r
|
45
|
+
r
|
46
|
+
r
|
47
|
+
r
|
48
|
+
r
|
49
|
+
r
|
50
|
+
r
|
51
|
+
r
|
52
|
+
s
|
53
|
+
t
|
54
|
+
t
|
55
|
+
u
|
56
|
+
u
|
57
|
+
u
|
58
|
+
u
|
59
|
+
u
|
60
|
+
u
|
61
|
+
u
|
62
|
+
u
|
63
|
+
u
|
64
|
+
u
|
65
|
+
u
|
66
|
+
u
|
67
|
+
u
|
68
|
+
u
|
69
|
+
v
|
70
|
+
v
|
71
|
+
v
|
72
|
+
v
|
73
|
+
v
|
74
|
+
v
|
75
|
+
v
|
76
|
+
v
|
77
|
+
v
|
78
|
+
v
|
79
|
+
w
|
80
|
+
w
|
81
|
+
w
|
82
|
+
w
|
83
|
+
w
|
84
|
+
w
|
85
|
+
w
|
86
|
+
w
|
87
|
+
w
|
88
|
+
w
|
89
|
+
w
|
90
|
+
w
|
91
|
+
w
|
92
|
+
w
|
93
|
+
w
|
94
|
+
x
|
95
|
+
y
|
96
|
+
z
|
data/lib/Data/PKU.map
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
1
|
2
|
+
4
|
3
|
+
a
|
4
|
+
ad
|
5
|
+
g
|
6
|
+
l
|
7
|
+
an
|
8
|
+
b
|
9
|
+
l
|
10
|
+
c
|
11
|
+
cc
|
12
|
+
d
|
13
|
+
g
|
14
|
+
l
|
15
|
+
e
|
16
|
+
f
|
17
|
+
h
|
18
|
+
k
|
19
|
+
m
|
20
|
+
Mg
|
21
|
+
mq
|
22
|
+
n
|
23
|
+
g
|
24
|
+
l
|
25
|
+
nr
|
26
|
+
nr
|
27
|
+
nr
|
28
|
+
nr
|
29
|
+
nr
|
30
|
+
ns
|
31
|
+
ns
|
32
|
+
nt
|
33
|
+
nz
|
34
|
+
o
|
35
|
+
p
|
36
|
+
p
|
37
|
+
p
|
38
|
+
q
|
39
|
+
qt
|
40
|
+
qv
|
41
|
+
r
|
42
|
+
Rg
|
43
|
+
r
|
44
|
+
r
|
45
|
+
r
|
46
|
+
r
|
47
|
+
r
|
48
|
+
r
|
49
|
+
r
|
50
|
+
r
|
51
|
+
r
|
52
|
+
s
|
53
|
+
t
|
54
|
+
tg
|
55
|
+
u
|
56
|
+
u
|
57
|
+
u
|
58
|
+
u
|
59
|
+
u
|
60
|
+
u
|
61
|
+
u
|
62
|
+
u
|
63
|
+
u
|
64
|
+
u
|
65
|
+
u
|
66
|
+
u
|
67
|
+
u
|
68
|
+
u
|
69
|
+
v
|
70
|
+
v
|
71
|
+
v
|
72
|
+
g
|
73
|
+
v
|
74
|
+
v
|
75
|
+
vn
|
76
|
+
v
|
77
|
+
v
|
78
|
+
v
|
79
|
+
w
|
80
|
+
w
|
81
|
+
w
|
82
|
+
w
|
83
|
+
w
|
84
|
+
w
|
85
|
+
w
|
86
|
+
w
|
87
|
+
w
|
88
|
+
w
|
89
|
+
w
|
90
|
+
w
|
91
|
+
w
|
92
|
+
w
|
93
|
+
w
|
94
|
+
x
|
95
|
+
y
|
96
|
+
z
|
@@ -0,0 +1,96 @@
|
|
1
|
+
1
|
2
|
+
4
|
3
|
+
a
|
4
|
+
a
|
5
|
+
g
|
6
|
+
l
|
7
|
+
a
|
8
|
+
b
|
9
|
+
l
|
10
|
+
c
|
11
|
+
c
|
12
|
+
d
|
13
|
+
g
|
14
|
+
l
|
15
|
+
e
|
16
|
+
f
|
17
|
+
h
|
18
|
+
k
|
19
|
+
m
|
20
|
+
m
|
21
|
+
m
|
22
|
+
n
|
23
|
+
g
|
24
|
+
l
|
25
|
+
n
|
26
|
+
n
|
27
|
+
n
|
28
|
+
n
|
29
|
+
n
|
30
|
+
n
|
31
|
+
n
|
32
|
+
n
|
33
|
+
n
|
34
|
+
o
|
35
|
+
p
|
36
|
+
p
|
37
|
+
p
|
38
|
+
q
|
39
|
+
q
|
40
|
+
q
|
41
|
+
r
|
42
|
+
R
|
43
|
+
r
|
44
|
+
r
|
45
|
+
r
|
46
|
+
r
|
47
|
+
r
|
48
|
+
r
|
49
|
+
r
|
50
|
+
r
|
51
|
+
r
|
52
|
+
s
|
53
|
+
t
|
54
|
+
t
|
55
|
+
u
|
56
|
+
u
|
57
|
+
u
|
58
|
+
u
|
59
|
+
u
|
60
|
+
u
|
61
|
+
u
|
62
|
+
u
|
63
|
+
u
|
64
|
+
u
|
65
|
+
u
|
66
|
+
u
|
67
|
+
u
|
68
|
+
u
|
69
|
+
v
|
70
|
+
v
|
71
|
+
v
|
72
|
+
g
|
73
|
+
v
|
74
|
+
v
|
75
|
+
v
|
76
|
+
v
|
77
|
+
v
|
78
|
+
v
|
79
|
+
w
|
80
|
+
w
|
81
|
+
w
|
82
|
+
w
|
83
|
+
w
|
84
|
+
w
|
85
|
+
w
|
86
|
+
w
|
87
|
+
w
|
88
|
+
w
|
89
|
+
w
|
90
|
+
w
|
91
|
+
w
|
92
|
+
w
|
93
|
+
w
|
94
|
+
x
|
95
|
+
y
|
96
|
+
z
|
Binary file
|
Binary file
|
data/lib/Data/UTF8.pdat
ADDED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/Data/nr.ctx
ADDED
Binary file
|
data/lib/Data/nr.fsa
ADDED
Binary file
|
data/lib/Data/nr.role
ADDED
Binary file
|
data/lib/nlpir.rb
ADDED
@@ -0,0 +1,274 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require File.expand_path("../nlpir/version", __FILE__)
|
3
|
+
require 'fiddle'
|
4
|
+
require 'fiddle/struct'
|
5
|
+
require 'fiddle/import'
|
6
|
+
require 'fileutils'
|
7
|
+
include Fiddle::CParser
|
8
|
+
include Fiddle::Importer
|
9
|
+
|
10
|
+
module Nlpir
|
11
|
+
NLPIR_FALSE = 0
|
12
|
+
NLPIR_TRUE = 1
|
13
|
+
POS_MAP_NUMBER = 4
|
14
|
+
ICT_POS_MAP_FIRST = 1 #计算所一级标注集
|
15
|
+
ICT_POS_MAP_SECOND = 0 #计算所二级标注集
|
16
|
+
PKU_POS_MAP_SECOND = 2 #北大二级标注集
|
17
|
+
PKU_POS_MAP_FIRST = 3 #北大一级标注集
|
18
|
+
POS_SIZE = 40
|
19
|
+
|
20
|
+
Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
|
21
|
+
'int word_ID','int word_type','double weight']
|
22
|
+
|
23
|
+
GBK_CODE = 0 #默认支持GBK编码
|
24
|
+
UTF8_CODE = GBK_CODE + 1 #UTF8编码
|
25
|
+
BIG5_CODE = GBK_CODE + 2 #BIG5编码
|
26
|
+
GBK_FANTI_CODE = GBK_CODE + 3 #GBK编码,里面包含繁体字
|
27
|
+
|
28
|
+
|
29
|
+
#提取链接库接口
|
30
|
+
libm = Fiddle.dlopen(File.expand_path("../../bin/libNLPIR.so", __FILE__))
|
31
|
+
|
32
|
+
NLPIR_Init_rb = Fiddle::Function.new(
|
33
|
+
libm['_Z10NLPIR_InitPKci'],
|
34
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
35
|
+
Fiddle::TYPE_INT
|
36
|
+
)
|
37
|
+
NLPIR_Exit_rb = Fiddle::Function.new(
|
38
|
+
libm['_Z10NLPIR_Exitv'],
|
39
|
+
[],
|
40
|
+
Fiddle::TYPE_INT
|
41
|
+
)
|
42
|
+
NLPIR_ImportUserDict_rb = Fiddle::Function.new(
|
43
|
+
libm['_Z20NLPIR_ImportUserDictPKc'],
|
44
|
+
[Fiddle::TYPE_VOIDP],
|
45
|
+
Fiddle::TYPE_INT
|
46
|
+
)
|
47
|
+
NLPIR_ParagraphProcess_rb = Fiddle::Function.new(
|
48
|
+
libm['_Z22NLPIR_ParagraphProcessPKci'],
|
49
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT],
|
50
|
+
Fiddle::TYPE_VOIDP
|
51
|
+
)
|
52
|
+
NLPIR_ParagraphProcessA_rb = Fiddle::Function.new(
|
53
|
+
libm['_Z23NLPIR_ParagraphProcessAPKcPib'],
|
54
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP],
|
55
|
+
Fiddle::TYPE_VOIDP
|
56
|
+
)
|
57
|
+
NLPIR_FileProcess_rb = Fiddle::Function.new(
|
58
|
+
libm['_Z17NLPIR_FileProcessPKcS0_i'],
|
59
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_VOIDP, Fiddle::TYPE_INT],
|
60
|
+
Fiddle::TYPE_DOUBLE
|
61
|
+
)
|
62
|
+
NLPIR_GetParagraphProcessAWordCount_rb = Fiddle::Function.new(
|
63
|
+
libm['_Z35NLPIR_GetParagraphProcessAWordCountPKc'],
|
64
|
+
[Fiddle::TYPE_VOIDP],
|
65
|
+
Fiddle::TYPE_INT
|
66
|
+
)
|
67
|
+
NLPIR_ParagraphProcessAW_rb = Fiddle::Function.new(
|
68
|
+
libm['_Z24NLPIR_ParagraphProcessAWiP8result_t'],
|
69
|
+
[Fiddle::TYPE_INT,Fiddle::TYPE_VOIDP],
|
70
|
+
Fiddle::TYPE_INT
|
71
|
+
)
|
72
|
+
NLPIR_AddUserWord_rb = Fiddle::Function.new(
|
73
|
+
libm['_Z17NLPIR_AddUserWordPKc'],
|
74
|
+
[Fiddle::TYPE_VOIDP],
|
75
|
+
Fiddle::TYPE_INT
|
76
|
+
)
|
77
|
+
NLPIR_SaveTheUsrDic_rb = Fiddle::Function.new(
|
78
|
+
libm['_Z19NLPIR_SaveTheUsrDicv'],
|
79
|
+
[],
|
80
|
+
Fiddle::TYPE_INT
|
81
|
+
)
|
82
|
+
NLPIR_DelUsrWord_rb = Fiddle::Function.new(
|
83
|
+
libm['_Z16NLPIR_DelUsrWordPKc'],
|
84
|
+
[Fiddle::TYPE_VOIDP],
|
85
|
+
Fiddle::TYPE_INT
|
86
|
+
)
|
87
|
+
NLPIR_GetKeyWords_rb = Fiddle::Function.new(
|
88
|
+
libm['_Z17NLPIR_GetKeyWordsPKcib'],
|
89
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
90
|
+
Fiddle::TYPE_VOIDP
|
91
|
+
)
|
92
|
+
NLPIR_GetFileKeyWords_rb = Fiddle::Function.new(
|
93
|
+
libm['_Z21NLPIR_GetFileKeyWordsPKcib'],
|
94
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
95
|
+
Fiddle::TYPE_VOIDP
|
96
|
+
)
|
97
|
+
NLPIR_GetNewWords_rb = Fiddle::Function.new(
|
98
|
+
libm['_Z17NLPIR_GetNewWordsPKcib'],
|
99
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
100
|
+
Fiddle::TYPE_VOIDP
|
101
|
+
)
|
102
|
+
NLPIR_GetFileNewWords_rb = Fiddle::Function.new(
|
103
|
+
libm['_Z21NLPIR_GetFileNewWordsPKcib'],
|
104
|
+
[Fiddle::TYPE_VOIDP,Fiddle::TYPE_INT,Fiddle::TYPE_INT],
|
105
|
+
Fiddle::TYPE_VOIDP
|
106
|
+
)
|
107
|
+
NLPIR_FingerPrint_rb = Fiddle::Function.new(
|
108
|
+
libm['_Z17NLPIR_FingerPrintPKc'],
|
109
|
+
[Fiddle::TYPE_VOIDP],
|
110
|
+
Fiddle::TYPE_LONG
|
111
|
+
)
|
112
|
+
NLPIR_SetPOSmap_rb = Fiddle::Function.new(
|
113
|
+
libm['_Z15NLPIR_SetPOSmapi'],
|
114
|
+
[Fiddle::TYPE_INT],
|
115
|
+
Fiddle::TYPE_INT
|
116
|
+
)
|
117
|
+
|
118
|
+
NLPIR_NWI_Start_rb = Fiddle::Function.new(
|
119
|
+
libm['_Z15NLPIR_NWI_Startv'],
|
120
|
+
[],
|
121
|
+
Fiddle::TYPE_INT
|
122
|
+
)
|
123
|
+
NLPIR_NWI_AddFile_rb = Fiddle::Function.new(
|
124
|
+
libm['_Z17NLPIR_NWI_AddFilePKc'],
|
125
|
+
[Fiddle::TYPE_VOIDP],
|
126
|
+
Fiddle::TYPE_INT
|
127
|
+
)
|
128
|
+
NLPIR_NWI_AddMem_rb = Fiddle::Function.new(
|
129
|
+
libm['_Z16NLPIR_NWI_AddMemPKc'],
|
130
|
+
[Fiddle::TYPE_VOIDP],
|
131
|
+
Fiddle::TYPE_INT
|
132
|
+
)
|
133
|
+
NLPIR_NWI_Complete_rb = Fiddle::Function.new(
|
134
|
+
libm['_Z18NLPIR_NWI_Completev'],
|
135
|
+
[],
|
136
|
+
Fiddle::TYPE_INT
|
137
|
+
)
|
138
|
+
NLPIR_NWI_GetResult_rb = Fiddle::Function.new(
|
139
|
+
libm['_Z19NLPIR_NWI_GetResultb'],
|
140
|
+
[Fiddle::TYPE_INT],
|
141
|
+
Fiddle::TYPE_VOIDP
|
142
|
+
)
|
143
|
+
NLPIR_NWI_Result2UserDict_rb = Fiddle::Function.new(
|
144
|
+
libm['_Z25NLPIR_NWI_Result2UserDictv'],
|
145
|
+
[],
|
146
|
+
Fiddle::TYPE_VOIDP
|
147
|
+
)
|
148
|
+
|
149
|
+
#--函数
|
150
|
+
|
151
|
+
def NLPIR_Init(sInitDirPath=nil , encoding=UTF8_CODE, filepath)
|
152
|
+
filepath += "/Data/"
|
153
|
+
if File.exist?(filepath)==false
|
154
|
+
FileUtils.mkdir(filepath)
|
155
|
+
filemother = File.expand_path("../Data/", __FILE__)
|
156
|
+
list=Dir.entries(filemother)
|
157
|
+
list.each_index do |x|
|
158
|
+
t = filemother+"/"+list[x]
|
159
|
+
FileUtils.cp(t,filepath) if !File.directory?(t)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
NLPIR_Init_rb.call(sInitDirPath,encoding)
|
165
|
+
|
166
|
+
end
|
167
|
+
|
168
|
+
def NLPIR_Exit()
|
169
|
+
NLPIR_Exit_rb.call()
|
170
|
+
end
|
171
|
+
|
172
|
+
def NLPIR_ImportUserDict(sFilename)
|
173
|
+
NLPIR_ImportUserDict_rb.call(sFilename)
|
174
|
+
end
|
175
|
+
|
176
|
+
def NLPIR_ParagraphProcess(sParagraph, bPOStagged=NLPIR_TRUE)
|
177
|
+
NLPIR_ParagraphProcess_rb.call(sParagraph, bPOStagged).to_s
|
178
|
+
end
|
179
|
+
|
180
|
+
def NLPIR_ParagraphProcessA(sParagraph)
|
181
|
+
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
182
|
+
pResultCount = Fiddle::Pointer.to_ptr(resultCount)
|
183
|
+
p = NLPIR_ParagraphProcessA_rb.call(sParagraph, pResultCount.ref.to_i)
|
184
|
+
pVecResult = Fiddle::Pointer.new(p.to_i)
|
185
|
+
words_list = []
|
186
|
+
words_list << Result_t.new(pVecResult)
|
187
|
+
for i in 1...resultCount do
|
188
|
+
words_list << Result_t.new(pVecResult += Result_t.size)
|
189
|
+
end
|
190
|
+
return words_list
|
191
|
+
end
|
192
|
+
|
193
|
+
def NLPIR_FileProcess(sSourceFilename, sResultFilename, bPOStagged=NLPIR_TRUE)
|
194
|
+
NLPIR_FileProcess_rb.call(sSourceFilename, sResultFilename, bPOStagged)
|
195
|
+
end
|
196
|
+
|
197
|
+
def NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
198
|
+
NLPIR_GetParagraphProcessAWordCount_rb.call(sParagraph)
|
199
|
+
end
|
200
|
+
|
201
|
+
def NLPIR_ParagraphProcessAW(sParagraph)
|
202
|
+
free = Fiddle::Function.new(Fiddle::RUBY_FREE, [TYPE_VOIDP], TYPE_VOID)
|
203
|
+
resultCount = NLPIR_GetParagraphProcessAWordCount(sParagraph)
|
204
|
+
pVecResult = Pointer.malloc(Result_t.size*resultCount,free)
|
205
|
+
NLPIR_ParagraphProcessAW_rb.call(resultCount,pVecResult)
|
206
|
+
words_list = []
|
207
|
+
words_list << Result_t.new(pVecResult)
|
208
|
+
for i in 1...resultCount do
|
209
|
+
words_list << Result_t.new(pVecResult+=Result_t.size)
|
210
|
+
end
|
211
|
+
return words_list
|
212
|
+
end
|
213
|
+
|
214
|
+
def NLPIR_AddUserWord(sWord)
|
215
|
+
NLPIR_AddUserWord_rb.call(sWord)
|
216
|
+
end
|
217
|
+
|
218
|
+
def NLPIR_SaveTheUsrDic()
|
219
|
+
NLPIR_SaveTheUsrDic_rb.call()
|
220
|
+
end
|
221
|
+
|
222
|
+
def NLPIR_DelUsrWord(sWord)
|
223
|
+
NLPIR_DelUsrWord_rb.call(sWord)
|
224
|
+
end
|
225
|
+
|
226
|
+
def NLPIR_GetKeyWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
227
|
+
NLPIR_GetKeyWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s
|
228
|
+
end
|
229
|
+
|
230
|
+
def NLPIR_GetFileKeyWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
231
|
+
NLPIR_GetFileKeyWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
|
232
|
+
end
|
233
|
+
|
234
|
+
def NLPIR_GetNewWords(sLine, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
235
|
+
NLPIR_GetNewWords_rb.call(sLine, nMaxKeyLimit, bWeightOut).to_s
|
236
|
+
end
|
237
|
+
|
238
|
+
def NLPIR_GetFileNewWords(sTextFile, nMaxKeyLimit=50, bWeightOut=NLPIR_FALSE)
|
239
|
+
NLPIR_GetFileNewWords_rb.call(sTextFile, nMaxKeyLimit, bWeightOut).to_s
|
240
|
+
end
|
241
|
+
|
242
|
+
def NLPIR_FingerPrint(sLine)
|
243
|
+
NLPIR_FingerPrint_rb.call(sLine)
|
244
|
+
end
|
245
|
+
|
246
|
+
def NLPIR_SetPOSmap(nPOSmap)
|
247
|
+
NLPIR_SetPOSmap_rb.call(nPOSmap)
|
248
|
+
end
|
249
|
+
|
250
|
+
def NLPIR_NWI_Start()
|
251
|
+
NLPIR_NWI_Start_rb.call()
|
252
|
+
end
|
253
|
+
|
254
|
+
def NLPIR_NWI_AddFile(sFilename)
|
255
|
+
NLPIR_NWI_AddFile_rb.call(sFilename)
|
256
|
+
end
|
257
|
+
|
258
|
+
def NLPIR_NWI_AddMem(sFilename)
|
259
|
+
NLPIR_NWI_AddMem_rb.call(sFilename)
|
260
|
+
end
|
261
|
+
|
262
|
+
def NLPIR_NWI_Complete()
|
263
|
+
NLPIR_NWI_Complete_rb.call()
|
264
|
+
end
|
265
|
+
|
266
|
+
def NLPIR_NWI_GetResult( bWeightOut = NLPIR_FALSE)
|
267
|
+
NLPIR_NWI_GetResult_rb.call(bWeightOut)
|
268
|
+
end
|
269
|
+
|
270
|
+
def NLPIR_NWI_Result2UserDict()
|
271
|
+
NLPIR_NWI_Result2UserDict_rb.call()
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|