nlpir 0.0.3-x86-linux → 0.1.0-x86-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -23
- data/lib/Data/FieldDict.pdat +0 -0
- data/lib/Data/FieldDict.pos +0 -0
- data/lib/Data/ICTPOS.map +96 -96
- data/lib/Data/NLPIR.user +0 -0
- data/lib/Data/NLPIR_First.map +96 -96
- data/lib/Data/NewWord.lst +2 -25
- data/lib/Data/PKU.map +96 -96
- data/lib/Data/PKU_First.map +96 -96
- data/lib/Data/UserDict.pdat +0 -0
- data/lib/nlpir/version.rb +1 -1
- data/test/Data/FieldDict.pdat +0 -0
- data/test/Data/FieldDict.pos +0 -0
- data/test/Data/ICTPOS.map +96 -96
- data/test/Data/NLPIR.user +0 -0
- data/test/Data/NLPIR_First.map +96 -96
- data/test/Data/NewWord.lst +2 -40
- data/test/Data/PKU.map +96 -96
- data/test/Data/PKU_First.map +96 -96
- data/test/test_nlpir.rb +5 -2
- data/test/userdict.txt +5 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1b4618aa4720763f75ad927ed54c335c33811622
|
4
|
+
data.tar.gz: af4693b310257df825852afc4362e68cc5734347
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b54252b4f738854a1896a04643612f66f07e22e0309fc19a6a13c680213b8b56a226a5f55a9bbe575854ca035c77239f432cfb031d1d72f367a543ae5b22a9a
|
7
|
+
data.tar.gz: 4d73ceefb3db7dba3ded885fa0e7dcdeadcdf2b78fb3932147ef389244af759f0d41988450df6b192fec63a4d4d4fa1b3af454ed975ce1b69bd72b8c39b6d5da
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
Nlpir version 0.0.
|
21
|
+
Nlpir version 0.0.3 just support the *nix OS, and We`ll release it for windows platform in few days.
|
22
22
|
|
23
23
|
some DEFINE you may use :
|
24
24
|
```ruby
|
@@ -27,9 +27,9 @@ some DEFINE you may use :
|
|
27
27
|
NLPIR_TRUE = 1
|
28
28
|
POS_MAP_NUMBER = 4
|
29
29
|
ICT_POS_MAP_FIRST = 1 #计算所一级标注集
|
30
|
-
ICT_POS_MAP_SECOND = 0
|
31
|
-
PKU_POS_MAP_SECOND = 2
|
32
|
-
PKU_POS_MAP_FIRST = 3
|
30
|
+
ICT_POS_MAP_SECOND = 0 #计算所二级标注集
|
31
|
+
PKU_POS_MAP_SECOND = 2 #北大二级标注集
|
32
|
+
PKU_POS_MAP_FIRST = 3 #北大一级标注集
|
33
33
|
POS_SIZE = 40
|
34
34
|
|
35
35
|
Result_t = struct ['int start','int length',"char sPOS[#{POS_SIZE}]",'int iPOS',
|
@@ -51,25 +51,14 @@ after you gem install it:
|
|
51
51
|
|
52
52
|
s = "坚定不移沿着中国特色社会主义道路前进 为全面建成小康社会而奋斗"
|
53
53
|
#first of all : Call the NLPIR API NLPIR_Init
|
54
|
-
|
55
|
-
|
56
|
-
end
|
57
|
-
|
58
|
-
#example1: Import user-defined dictionary from a text file. and puts NLPIR result
|
59
|
-
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
60
|
-
puts NLPIR_ImportUserDict("./userdict.txt")
|
61
|
-
NLPIR_AddUserWord("1989年春夏之交的政治风波 n")
|
62
|
-
#you can see the example file: ./userdict.txt to know the userdict`s format requirements
|
63
|
-
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
64
|
-
NLPIR_DelUsrWord("1989年春夏之交的政治风波")
|
65
|
-
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
66
|
-
puts "\n"
|
54
|
+
|
55
|
+
NLPIR_Init(nil, UTF8_CODE , File.expand_path("../", __FILE__))
|
67
56
|
|
68
|
-
#
|
57
|
+
#example1: Process a paragraph, and return the result text with POS or not
|
69
58
|
puts NLPIR_ParagraphProcess(s, NLPIR_TRUE)
|
70
59
|
puts NLPIR_ParagraphProcess(s, NLPIR_FALSE)
|
71
60
|
|
72
|
-
#
|
61
|
+
#example2: Process a paragraph, and return an array filled elements are POSed words.
|
73
62
|
#tips: NLPIR_ParagraphProcessA() return the array, and its memory is malloced by NLPIR, it will be freed by NLPIR_Exit() (memory in server)
|
74
63
|
|
75
64
|
words_list = NLPIR_ParagraphProcessA(s)
|
@@ -88,7 +77,7 @@ after you gem install it:
|
|
88
77
|
i += 1
|
89
78
|
end
|
90
79
|
|
91
|
-
#
|
80
|
+
#example3: Process a paragraph, and return an array filled elements are POSed words.
|
92
81
|
#tips: NLPIR_ParagraphProcessAW() return the array, and its memory is malloced by ruby::fiddle,and be collect by GC (memory in agent)
|
93
82
|
|
94
83
|
words_list = NLPIR_ParagraphProcessAW(s)
|
@@ -107,15 +96,15 @@ after you gem install it:
|
|
107
96
|
i += 1
|
108
97
|
end
|
109
98
|
|
110
|
-
#
|
99
|
+
#example4: Process a text file, and wirte the result text to file
|
111
100
|
puts NLPIR_FileProcess("./test.txt", "./test_result.txt", NULL)
|
112
101
|
|
113
102
|
|
114
|
-
#
|
103
|
+
#example5: Get ProcessAWordCount, it returns the count of the words
|
115
104
|
puts count = NLPIR_GetParagraphProcessAWordCount(s)
|
116
105
|
|
117
106
|
|
118
|
-
#
|
107
|
+
#example6: Add/Delete a word to the user dictionary (the path of user dictionary is ./data/userdict.dpat)
|
119
108
|
puts NLPIR_ParagraphProcess("我们都是爱思客")
|
120
109
|
#add a user word
|
121
110
|
NLPIR_AddUserWord("都是爱思客 n")
|
@@ -133,6 +122,16 @@ after you gem install it:
|
|
133
122
|
NLPIR_SaveTheUsrDic()
|
134
123
|
puts NLPIR_ParagraphProcess("我们都是爱思客")
|
135
124
|
|
125
|
+
|
126
|
+
#example7: Import user-defined dictionary from a text file. and puts NLPIR result
|
127
|
+
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
128
|
+
puts NLPIR_ImportUserDict("./userdict.txt")
|
129
|
+
NLPIR_AddUserWord("1989年春夏之交的政治风波 n")
|
130
|
+
#you can see the example file: ./userdict.txt to know the userdict`s format requirements
|
131
|
+
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
132
|
+
NLPIR_DelUsrWord("1989年春夏之交的政治风波")
|
133
|
+
puts NLPIR_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议")
|
134
|
+
puts "\n"
|
136
135
|
|
137
136
|
#example8: Get keywords of text
|
138
137
|
#2nd parameter is the MaxNumber of keywords
|
data/lib/Data/FieldDict.pdat
CHANGED
Binary file
|
data/lib/Data/FieldDict.pos
CHANGED
Binary file
|
data/lib/Data/ICTPOS.map
CHANGED
@@ -1,96 +1,96 @@
|
|
1
|
-
1
|
2
|
-
4
|
3
|
-
a
|
4
|
-
ad
|
5
|
-
ag
|
6
|
-
al
|
7
|
-
an
|
8
|
-
b
|
9
|
-
bl
|
10
|
-
c
|
11
|
-
cc
|
12
|
-
d
|
13
|
-
dg
|
14
|
-
dl
|
15
|
-
e
|
16
|
-
f
|
17
|
-
h
|
18
|
-
k
|
19
|
-
m
|
20
|
-
Mg
|
21
|
-
mq
|
22
|
-
n
|
23
|
-
ng
|
24
|
-
nl
|
25
|
-
nr
|
26
|
-
nr1
|
27
|
-
nr2
|
28
|
-
nrf
|
29
|
-
nrj
|
30
|
-
ns
|
31
|
-
nsf
|
32
|
-
nt
|
33
|
-
nz
|
34
|
-
o
|
35
|
-
p
|
36
|
-
pba
|
37
|
-
pbei
|
38
|
-
q
|
39
|
-
qt
|
40
|
-
qv
|
41
|
-
r
|
42
|
-
Rg
|
43
|
-
rr
|
44
|
-
ry
|
45
|
-
rys
|
46
|
-
ryt
|
47
|
-
ryv
|
48
|
-
rz
|
49
|
-
rzs
|
50
|
-
rzt
|
51
|
-
rzv
|
52
|
-
s
|
53
|
-
t
|
54
|
-
tg
|
55
|
-
u
|
56
|
-
ude1
|
57
|
-
ude2
|
58
|
-
ude3
|
59
|
-
udeng
|
60
|
-
udh
|
61
|
-
uguo
|
62
|
-
ule
|
63
|
-
ulian
|
64
|
-
uls
|
65
|
-
usuo
|
66
|
-
uyy
|
67
|
-
uzhe
|
68
|
-
uzhi
|
69
|
-
v
|
70
|
-
vd
|
71
|
-
vf
|
72
|
-
vg
|
73
|
-
vi
|
74
|
-
vl
|
75
|
-
vn
|
76
|
-
vshi
|
77
|
-
vx
|
78
|
-
vyou
|
79
|
-
w
|
80
|
-
wb
|
81
|
-
wd
|
82
|
-
wf
|
83
|
-
wj
|
84
|
-
wky
|
85
|
-
wkz
|
86
|
-
wm
|
87
|
-
wn
|
88
|
-
wp
|
89
|
-
ws
|
90
|
-
wt
|
91
|
-
ww
|
92
|
-
wyy
|
93
|
-
wyz
|
94
|
-
x
|
95
|
-
y
|
96
|
-
z
|
1
|
+
1
|
2
|
+
4
|
3
|
+
a
|
4
|
+
ad
|
5
|
+
ag
|
6
|
+
al
|
7
|
+
an
|
8
|
+
b
|
9
|
+
bl
|
10
|
+
c
|
11
|
+
cc
|
12
|
+
d
|
13
|
+
dg
|
14
|
+
dl
|
15
|
+
e
|
16
|
+
f
|
17
|
+
h
|
18
|
+
k
|
19
|
+
m
|
20
|
+
Mg
|
21
|
+
mq
|
22
|
+
n
|
23
|
+
ng
|
24
|
+
nl
|
25
|
+
nr
|
26
|
+
nr1
|
27
|
+
nr2
|
28
|
+
nrf
|
29
|
+
nrj
|
30
|
+
ns
|
31
|
+
nsf
|
32
|
+
nt
|
33
|
+
nz
|
34
|
+
o
|
35
|
+
p
|
36
|
+
pba
|
37
|
+
pbei
|
38
|
+
q
|
39
|
+
qt
|
40
|
+
qv
|
41
|
+
r
|
42
|
+
Rg
|
43
|
+
rr
|
44
|
+
ry
|
45
|
+
rys
|
46
|
+
ryt
|
47
|
+
ryv
|
48
|
+
rz
|
49
|
+
rzs
|
50
|
+
rzt
|
51
|
+
rzv
|
52
|
+
s
|
53
|
+
t
|
54
|
+
tg
|
55
|
+
u
|
56
|
+
ude1
|
57
|
+
ude2
|
58
|
+
ude3
|
59
|
+
udeng
|
60
|
+
udh
|
61
|
+
uguo
|
62
|
+
ule
|
63
|
+
ulian
|
64
|
+
uls
|
65
|
+
usuo
|
66
|
+
uyy
|
67
|
+
uzhe
|
68
|
+
uzhi
|
69
|
+
v
|
70
|
+
vd
|
71
|
+
vf
|
72
|
+
vg
|
73
|
+
vi
|
74
|
+
vl
|
75
|
+
vn
|
76
|
+
vshi
|
77
|
+
vx
|
78
|
+
vyou
|
79
|
+
w
|
80
|
+
wb
|
81
|
+
wd
|
82
|
+
wf
|
83
|
+
wj
|
84
|
+
wky
|
85
|
+
wkz
|
86
|
+
wm
|
87
|
+
wn
|
88
|
+
wp
|
89
|
+
ws
|
90
|
+
wt
|
91
|
+
ww
|
92
|
+
wyy
|
93
|
+
wyz
|
94
|
+
x
|
95
|
+
y
|
96
|
+
z
|
data/lib/Data/NLPIR.user
CHANGED
Binary file
|
data/lib/Data/NLPIR_First.map
CHANGED
@@ -1,96 +1,96 @@
|
|
1
|
-
1
|
2
|
-
4
|
3
|
-
a
|
4
|
-
a
|
5
|
-
a
|
6
|
-
a
|
7
|
-
a
|
8
|
-
b
|
9
|
-
b
|
10
|
-
c
|
11
|
-
c
|
12
|
-
d
|
13
|
-
d
|
14
|
-
d
|
15
|
-
e
|
16
|
-
f
|
17
|
-
h
|
18
|
-
k
|
19
|
-
m
|
20
|
-
m
|
21
|
-
m
|
22
|
-
n
|
23
|
-
n
|
24
|
-
n
|
25
|
-
n
|
26
|
-
n
|
27
|
-
n
|
28
|
-
n
|
29
|
-
n
|
30
|
-
n
|
31
|
-
n
|
32
|
-
n
|
33
|
-
n
|
34
|
-
o
|
35
|
-
p
|
36
|
-
p
|
37
|
-
p
|
38
|
-
q
|
39
|
-
q
|
40
|
-
q
|
41
|
-
r
|
42
|
-
R
|
43
|
-
r
|
44
|
-
r
|
45
|
-
r
|
46
|
-
r
|
47
|
-
r
|
48
|
-
r
|
49
|
-
r
|
50
|
-
r
|
51
|
-
r
|
52
|
-
s
|
53
|
-
t
|
54
|
-
t
|
55
|
-
u
|
56
|
-
u
|
57
|
-
u
|
58
|
-
u
|
59
|
-
u
|
60
|
-
u
|
61
|
-
u
|
62
|
-
u
|
63
|
-
u
|
64
|
-
u
|
65
|
-
u
|
66
|
-
u
|
67
|
-
u
|
68
|
-
u
|
69
|
-
v
|
70
|
-
v
|
71
|
-
v
|
72
|
-
v
|
73
|
-
v
|
74
|
-
v
|
75
|
-
v
|
76
|
-
v
|
77
|
-
v
|
78
|
-
v
|
79
|
-
w
|
80
|
-
w
|
81
|
-
w
|
82
|
-
w
|
83
|
-
w
|
84
|
-
w
|
85
|
-
w
|
86
|
-
w
|
87
|
-
w
|
88
|
-
w
|
89
|
-
w
|
90
|
-
w
|
91
|
-
w
|
92
|
-
w
|
93
|
-
w
|
94
|
-
x
|
95
|
-
y
|
96
|
-
z
|
1
|
+
1
|
2
|
+
4
|
3
|
+
a
|
4
|
+
a
|
5
|
+
a
|
6
|
+
a
|
7
|
+
a
|
8
|
+
b
|
9
|
+
b
|
10
|
+
c
|
11
|
+
c
|
12
|
+
d
|
13
|
+
d
|
14
|
+
d
|
15
|
+
e
|
16
|
+
f
|
17
|
+
h
|
18
|
+
k
|
19
|
+
m
|
20
|
+
m
|
21
|
+
m
|
22
|
+
n
|
23
|
+
n
|
24
|
+
n
|
25
|
+
n
|
26
|
+
n
|
27
|
+
n
|
28
|
+
n
|
29
|
+
n
|
30
|
+
n
|
31
|
+
n
|
32
|
+
n
|
33
|
+
n
|
34
|
+
o
|
35
|
+
p
|
36
|
+
p
|
37
|
+
p
|
38
|
+
q
|
39
|
+
q
|
40
|
+
q
|
41
|
+
r
|
42
|
+
R
|
43
|
+
r
|
44
|
+
r
|
45
|
+
r
|
46
|
+
r
|
47
|
+
r
|
48
|
+
r
|
49
|
+
r
|
50
|
+
r
|
51
|
+
r
|
52
|
+
s
|
53
|
+
t
|
54
|
+
t
|
55
|
+
u
|
56
|
+
u
|
57
|
+
u
|
58
|
+
u
|
59
|
+
u
|
60
|
+
u
|
61
|
+
u
|
62
|
+
u
|
63
|
+
u
|
64
|
+
u
|
65
|
+
u
|
66
|
+
u
|
67
|
+
u
|
68
|
+
u
|
69
|
+
v
|
70
|
+
v
|
71
|
+
v
|
72
|
+
v
|
73
|
+
v
|
74
|
+
v
|
75
|
+
v
|
76
|
+
v
|
77
|
+
v
|
78
|
+
v
|
79
|
+
w
|
80
|
+
w
|
81
|
+
w
|
82
|
+
w
|
83
|
+
w
|
84
|
+
w
|
85
|
+
w
|
86
|
+
w
|
87
|
+
w
|
88
|
+
w
|
89
|
+
w
|
90
|
+
w
|
91
|
+
w
|
92
|
+
w
|
93
|
+
w
|
94
|
+
x
|
95
|
+
y
|
96
|
+
z
|