nlpir 0.0.3-x86-linux → 0.1.0-x86-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +22 -23
- data/lib/Data/FieldDict.pdat +0 -0
- data/lib/Data/FieldDict.pos +0 -0
- data/lib/Data/ICTPOS.map +96 -96
- data/lib/Data/NLPIR.user +0 -0
- data/lib/Data/NLPIR_First.map +96 -96
- data/lib/Data/NewWord.lst +2 -25
- data/lib/Data/PKU.map +96 -96
- data/lib/Data/PKU_First.map +96 -96
- data/lib/Data/UserDict.pdat +0 -0
- data/lib/nlpir/version.rb +1 -1
- data/test/Data/FieldDict.pdat +0 -0
- data/test/Data/FieldDict.pos +0 -0
- data/test/Data/ICTPOS.map +96 -96
- data/test/Data/NLPIR.user +0 -0
- data/test/Data/NLPIR_First.map +96 -96
- data/test/Data/NewWord.lst +2 -40
- data/test/Data/PKU.map +96 -96
- data/test/Data/PKU_First.map +96 -96
- data/test/test_nlpir.rb +5 -2
- data/test/userdict.txt +5 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1b4618aa4720763f75ad927ed54c335c33811622
|
4
|
+
data.tar.gz: af4693b310257df825852afc4362e68cc5734347
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b54252b4f738854a1896a04643612f66f07e22e0309fc19a6a13c680213b8b56a226a5f55a9bbe575854ca035c77239f432cfb031d1d72f367a543ae5b22a9a
|
7
|
+
data.tar.gz: 4d73ceefb3db7dba3ded885fa0e7dcdeadcdf2b78fb3932147ef389244af759f0d41988450df6b192fec63a4d4d4fa1b3af454ed975ce1b69bd72b8c39b6d5da
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
Nlpir version 0.0.
|
21
|
+
Nlpir version 0.0.3 just support the *nix OS, and We`ll release it for windows platform in few days.
|
22
22
|
|
23
23
|
some DEFINE you may use :
|
24
24
|
```ruby
|
@@ -27,9 +27,9 @@ some DEFINE you may use :
|
|
27
27
|
NLPIR_TRUE = 1
|
28
28
|
POS_MAP_NUMBER = 4
|
29
29
|
ICT_POS_MAP_FIRST = 1 #计算所一级标注集
|
30
|
-
ICT_POS_MAP_SECOND = 0
|
31
|
-
PKU_POS_MAP_SECOND = 2
|
32
|
-
PKU_POS_MAP_FIRST = 3
|
30
|
+
ICT_POS_MAP_SECOND = 0 #计算所二级标注集
|
31
|
+
PKU_POS_MAP_SECOND = 2 #北大二级标注集
|
32
|
+
PKU_POS_MAP_FIRST = 3 #北大一级标注集
|
33
33
|
POS_SIZE = 40
|
34
34
|
|
35
35
|
Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
|
@@ -51,25 +51,14 @@ after you gem install it:
|
|
51
51
|
|
52
52
|
s = "坚定不移沿着中国特色社会主义道路前进 为全面建成小康社会而奋斗"
|
53
53
|
#first of all : Call the NLPIR API NLPIR_Init
|
54
|
-
|
55
|
-
|
56
|
-
end
|
57
|
-
|
58
|
-
#example1: Import user-defined dictionary from a text file. and puts NLPIR result
|
59
|
-
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
60
|
-
puts NLPIR_ImportUserDict("./userdict.txt")
|
61
|
-
NLPIR_AddUserWord("1989年春夏之交的政治风波 n")
|
62
|
-
#you can see the example file: ./userdict.txt to know the userdict`s format requirements
|
63
|
-
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
64
|
-
NLPIR_DelUsrWord("1989年春夏之交的政治风波")
|
65
|
-
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
66
|
-
puts "\n"
|
54
|
+
|
55
|
+
NLPIR_Init(nil, UTF8_CODE , File.expand_path("../", __FILE__))
|
67
56
|
|
68
|
-
#
|
57
|
+
#example1: Process a paragraph, and return the result text with POS or not
|
69
58
|
puts NLPIR_ParagraphProcess(s, NLPIR_TRUE)
|
70
59
|
puts NLPIR_ParagraphProcess(s, NLPIR_FALSE)
|
71
60
|
|
72
|
-
#
|
61
|
+
#example2: Process a paragraph, and return an array filled elements are POSed words.
|
73
62
|
#tips: NLPIR_ParagraphProcessA() return the array, and its memory is malloced by NLPIR, it will be freed by NLPIR_Exit() (memory in server)
|
74
63
|
|
75
64
|
words_list = NLPIR_ParagraphProcessA(s)
|
@@ -88,7 +77,7 @@ after you gem install it:
|
|
88
77
|
i += 1
|
89
78
|
end
|
90
79
|
|
91
|
-
#
|
80
|
+
#example3: Process a paragraph, and return an array filled elements are POSed words.
|
92
81
|
#tips: NLPIR_ParagraphProcessAW() return the array, and its memory is malloced by ruby::fiddle,and be collect by GC (memory in agent)
|
93
82
|
|
94
83
|
words_list = NLPIR_ParagraphProcessAW(s)
|
@@ -107,15 +96,15 @@ after you gem install it:
|
|
107
96
|
i += 1
|
108
97
|
end
|
109
98
|
|
110
|
-
#
|
99
|
+
#example4: Process a text file, and wirte the result text to file
|
111
100
|
puts NLPIR_FileProcess("./test.txt", "./test_result.txt", NULL)
|
112
101
|
|
113
102
|
|
114
|
-
#
|
103
|
+
#example5: Get ProcessAWordCount, it returns the count of the words
|
115
104
|
puts count = NLPIR_GetParagraphProcessAWordCount(s)
|
116
105
|
|
117
106
|
|
118
|
-
#
|
107
|
+
#example6: Add/Delete a word to the user dictionary (the path of user dictionary is ./data/userdict.dpat)
|
119
108
|
puts NLPIR_ParagraphProcess("我们都是爱思客")
|
120
109
|
#add a user word
|
121
110
|
NLPIR_AddUserWord("都是爱思客 n")
|
@@ -133,6 +122,16 @@ after you gem install it:
|
|
133
122
|
NLPIR_SaveTheUsrDic()
|
134
123
|
puts NLPIR_ParagraphProcess("我们都是爱思客")
|
135
124
|
|
125
|
+
|
126
|
+
#example7: Import user-defined dictionary from a text file. and puts NLPIR result
|
127
|
+
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
128
|
+
puts NLPIR_ImportUserDict("./userdict.txt")
|
129
|
+
NLPIR_AddUserWord("1989年春夏之交的政治风波 n")
|
130
|
+
#you can see the example file: ./userdict.txt to know the userdict`s format requirements
|
131
|
+
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
132
|
+
NLPIR_DelUsrWord("1989年春夏之交的政治风波")
|
133
|
+
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
134
|
+
puts "\n"
|
136
135
|
|
137
136
|
#example8: Get keywords of text
|
138
137
|
#2nd parameter is the MaxNumber of keywords
|
data/lib/Data/FieldDict.pdat
CHANGED
Binary file
|
data/lib/Data/FieldDict.pos
CHANGED
Binary file
|
data/lib/Data/ICTPOS.map
CHANGED
@@ -1,96 +1,96 @@
|
|
1
|
-
1
|
2
|
-
4
|
3
|
-
a
|
4
|
-
ad
|
5
|
-
ag
|
6
|
-
al
|
7
|
-
an
|
8
|
-
b
|
9
|
-
bl
|
10
|
-
c
|
11
|
-
cc
|
12
|
-
d
|
13
|
-
dg
|
14
|
-
dl
|
15
|
-
e
|
16
|
-
f
|
17
|
-
h
|
18
|
-
k
|
19
|
-
m
|
20
|
-
Mg
|
21
|
-
mq
|
22
|
-
n
|
23
|
-
ng
|
24
|
-
nl
|
25
|
-
nr
|
26
|
-
nr1
|
27
|
-
nr2
|
28
|
-
nrf
|
29
|
-
nrj
|
30
|
-
ns
|
31
|
-
nsf
|
32
|
-
nt
|
33
|
-
nz
|
34
|
-
o
|
35
|
-
p
|
36
|
-
pba
|
37
|
-
pbei
|
38
|
-
q
|
39
|
-
qt
|
40
|
-
qv
|
41
|
-
r
|
42
|
-
Rg
|
43
|
-
rr
|
44
|
-
ry
|
45
|
-
rys
|
46
|
-
ryt
|
47
|
-
ryv
|
48
|
-
rz
|
49
|
-
rzs
|
50
|
-
rzt
|
51
|
-
rzv
|
52
|
-
s
|
53
|
-
t
|
54
|
-
tg
|
55
|
-
u
|
56
|
-
ude1
|
57
|
-
ude2
|
58
|
-
ude3
|
59
|
-
udeng
|
60
|
-
udh
|
61
|
-
uguo
|
62
|
-
ule
|
63
|
-
ulian
|
64
|
-
uls
|
65
|
-
usuo
|
66
|
-
uyy
|
67
|
-
uzhe
|
68
|
-
uzhi
|
69
|
-
v
|
70
|
-
vd
|
71
|
-
vf
|
72
|
-
vg
|
73
|
-
vi
|
74
|
-
vl
|
75
|
-
vn
|
76
|
-
vshi
|
77
|
-
vx
|
78
|
-
vyou
|
79
|
-
w
|
80
|
-
wb
|
81
|
-
wd
|
82
|
-
wf
|
83
|
-
wj
|
84
|
-
wky
|
85
|
-
wkz
|
86
|
-
wm
|
87
|
-
wn
|
88
|
-
wp
|
89
|
-
ws
|
90
|
-
wt
|
91
|
-
ww
|
92
|
-
wyy
|
93
|
-
wyz
|
94
|
-
x
|
95
|
-
y
|
96
|
-
z
|
1
|
+
1
|
2
|
+
4
|
3
|
+
a
|
4
|
+
ad
|
5
|
+
ag
|
6
|
+
al
|
7
|
+
an
|
8
|
+
b
|
9
|
+
bl
|
10
|
+
c
|
11
|
+
cc
|
12
|
+
d
|
13
|
+
dg
|
14
|
+
dl
|
15
|
+
e
|
16
|
+
f
|
17
|
+
h
|
18
|
+
k
|
19
|
+
m
|
20
|
+
Mg
|
21
|
+
mq
|
22
|
+
n
|
23
|
+
ng
|
24
|
+
nl
|
25
|
+
nr
|
26
|
+
nr1
|
27
|
+
nr2
|
28
|
+
nrf
|
29
|
+
nrj
|
30
|
+
ns
|
31
|
+
nsf
|
32
|
+
nt
|
33
|
+
nz
|
34
|
+
o
|
35
|
+
p
|
36
|
+
pba
|
37
|
+
pbei
|
38
|
+
q
|
39
|
+
qt
|
40
|
+
qv
|
41
|
+
r
|
42
|
+
Rg
|
43
|
+
rr
|
44
|
+
ry
|
45
|
+
rys
|
46
|
+
ryt
|
47
|
+
ryv
|
48
|
+
rz
|
49
|
+
rzs
|
50
|
+
rzt
|
51
|
+
rzv
|
52
|
+
s
|
53
|
+
t
|
54
|
+
tg
|
55
|
+
u
|
56
|
+
ude1
|
57
|
+
ude2
|
58
|
+
ude3
|
59
|
+
udeng
|
60
|
+
udh
|
61
|
+
uguo
|
62
|
+
ule
|
63
|
+
ulian
|
64
|
+
uls
|
65
|
+
usuo
|
66
|
+
uyy
|
67
|
+
uzhe
|
68
|
+
uzhi
|
69
|
+
v
|
70
|
+
vd
|
71
|
+
vf
|
72
|
+
vg
|
73
|
+
vi
|
74
|
+
vl
|
75
|
+
vn
|
76
|
+
vshi
|
77
|
+
vx
|
78
|
+
vyou
|
79
|
+
w
|
80
|
+
wb
|
81
|
+
wd
|
82
|
+
wf
|
83
|
+
wj
|
84
|
+
wky
|
85
|
+
wkz
|
86
|
+
wm
|
87
|
+
wn
|
88
|
+
wp
|
89
|
+
ws
|
90
|
+
wt
|
91
|
+
ww
|
92
|
+
wyy
|
93
|
+
wyz
|
94
|
+
x
|
95
|
+
y
|
96
|
+
z
|
data/lib/Data/NLPIR.user
CHANGED
Binary file
|
data/lib/Data/NLPIR_First.map
CHANGED
@@ -1,96 +1,96 @@
|
|
1
|
-
1
|
2
|
-
4
|
3
|
-
a
|
4
|
-
a
|
5
|
-
a
|
6
|
-
a
|
7
|
-
a
|
8
|
-
b
|
9
|
-
b
|
10
|
-
c
|
11
|
-
c
|
12
|
-
d
|
13
|
-
d
|
14
|
-
d
|
15
|
-
e
|
16
|
-
f
|
17
|
-
h
|
18
|
-
k
|
19
|
-
m
|
20
|
-
m
|
21
|
-
m
|
22
|
-
n
|
23
|
-
n
|
24
|
-
n
|
25
|
-
n
|
26
|
-
n
|
27
|
-
n
|
28
|
-
n
|
29
|
-
n
|
30
|
-
n
|
31
|
-
n
|
32
|
-
n
|
33
|
-
n
|
34
|
-
o
|
35
|
-
p
|
36
|
-
p
|
37
|
-
p
|
38
|
-
q
|
39
|
-
q
|
40
|
-
q
|
41
|
-
r
|
42
|
-
R
|
43
|
-
r
|
44
|
-
r
|
45
|
-
r
|
46
|
-
r
|
47
|
-
r
|
48
|
-
r
|
49
|
-
r
|
50
|
-
r
|
51
|
-
r
|
52
|
-
s
|
53
|
-
t
|
54
|
-
t
|
55
|
-
u
|
56
|
-
u
|
57
|
-
u
|
58
|
-
u
|
59
|
-
u
|
60
|
-
u
|
61
|
-
u
|
62
|
-
u
|
63
|
-
u
|
64
|
-
u
|
65
|
-
u
|
66
|
-
u
|
67
|
-
u
|
68
|
-
u
|
69
|
-
v
|
70
|
-
v
|
71
|
-
v
|
72
|
-
v
|
73
|
-
v
|
74
|
-
v
|
75
|
-
v
|
76
|
-
v
|
77
|
-
v
|
78
|
-
v
|
79
|
-
w
|
80
|
-
w
|
81
|
-
w
|
82
|
-
w
|
83
|
-
w
|
84
|
-
w
|
85
|
-
w
|
86
|
-
w
|
87
|
-
w
|
88
|
-
w
|
89
|
-
w
|
90
|
-
w
|
91
|
-
w
|
92
|
-
w
|
93
|
-
w
|
94
|
-
x
|
95
|
-
y
|
96
|
-
z
|
1
|
+
1
|
2
|
+
4
|
3
|
+
a
|
4
|
+
a
|
5
|
+
a
|
6
|
+
a
|
7
|
+
a
|
8
|
+
b
|
9
|
+
b
|
10
|
+
c
|
11
|
+
c
|
12
|
+
d
|
13
|
+
d
|
14
|
+
d
|
15
|
+
e
|
16
|
+
f
|
17
|
+
h
|
18
|
+
k
|
19
|
+
m
|
20
|
+
m
|
21
|
+
m
|
22
|
+
n
|
23
|
+
n
|
24
|
+
n
|
25
|
+
n
|
26
|
+
n
|
27
|
+
n
|
28
|
+
n
|
29
|
+
n
|
30
|
+
n
|
31
|
+
n
|
32
|
+
n
|
33
|
+
n
|
34
|
+
o
|
35
|
+
p
|
36
|
+
p
|
37
|
+
p
|
38
|
+
q
|
39
|
+
q
|
40
|
+
q
|
41
|
+
r
|
42
|
+
R
|
43
|
+
r
|
44
|
+
r
|
45
|
+
r
|
46
|
+
r
|
47
|
+
r
|
48
|
+
r
|
49
|
+
r
|
50
|
+
r
|
51
|
+
r
|
52
|
+
s
|
53
|
+
t
|
54
|
+
t
|
55
|
+
u
|
56
|
+
u
|
57
|
+
u
|
58
|
+
u
|
59
|
+
u
|
60
|
+
u
|
61
|
+
u
|
62
|
+
u
|
63
|
+
u
|
64
|
+
u
|
65
|
+
u
|
66
|
+
u
|
67
|
+
u
|
68
|
+
u
|
69
|
+
v
|
70
|
+
v
|
71
|
+
v
|
72
|
+
v
|
73
|
+
v
|
74
|
+
v
|
75
|
+
v
|
76
|
+
v
|
77
|
+
v
|
78
|
+
v
|
79
|
+
w
|
80
|
+
w
|
81
|
+
w
|
82
|
+
w
|
83
|
+
w
|
84
|
+
w
|
85
|
+
w
|
86
|
+
w
|
87
|
+
w
|
88
|
+
w
|
89
|
+
w
|
90
|
+
w
|
91
|
+
w
|
92
|
+
w
|
93
|
+
w
|
94
|
+
x
|
95
|
+
y
|
96
|
+
z
|