vn_tagger 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +28 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +34 -0
- data/Rakefile +2 -0
- data/VN_TAGGER LICENSE.txt +5 -0
- data/lib/commons-cli-1.2.jar +0 -0
- data/lib/edu.stanford.nlp.tagger-2.0.jar +0 -0
- data/lib/vn.hus.nlp.fsm-1.0.0.jar +0 -0
- data/lib/vn.hus.nlp.tokenizer-4.1.1.jar +0 -0
- data/lib/vn.hus.nlp.utils-1.0.0.jar +0 -0
- data/lib/vn_tagger.rb +7 -0
- data/lib/vn_tagger/tagger.rb +44 -0
- data/lib/vn_tagger/version.rb +3 -0
- data/resources/automata/dfaLexicon.xml +123032 -0
- data/resources/automata/externalLexicon.xml +7 -0
- data/resources/bigram/bigram.xml +211658 -0
- data/resources/bigram/unigram.xml +24309 -0
- data/resources/lexers/lexers.xml +68 -0
- data/resources/models/vtb.tagger +0 -0
- data/resources/models/vtb.tagger.props +36 -0
- data/resources/normalization/rules.txt +15 -0
- data/resources/prefix/namedEntityPrefix.xml +34 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/vn_tagger/tagger_spec.rb +14 -0
- data/spec/vn_tagger_spec.rb +6 -0
- data/vn.hus.nlp.tagger-4.2.0.jar +0 -0
- data/vnTagger.sh +28 -0
- data/vn_tagger.gemspec +25 -0
- metadata +134 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
|
|
2
|
+
<!-- (C) phuonglh@gmail.com -->
|
|
3
|
+
<corpus id="resources/lexers/lexers.xml">
|
|
4
|
+
<body>
|
|
5
|
+
<w msd="numbersign">#</w>
|
|
6
|
+
<w msd="ampersand">&</w>
|
|
7
|
+
<w msd="date_mm-dd-yy">(0*[1-9]|1[012])-(0*[1-9]|[12][0-9]|3[01])-\d\d</w>
|
|
8
|
+
<w msd="date_mm/dd/yy">(0*[1-9]|1[012])/(0*[1-9]|[12][0-9]|3[01])/\d\d</w>
|
|
9
|
+
<w msd="date_mm.dd.yy">(0*[1-9]|1[012])\.(0*[1-9]|[12][0-9]|3[01])\.\d\d</w>
|
|
10
|
+
<w msd="date_dd-mm-yy">([12][0-9]|3[01]|0*[1-9])-(1[012]||0*[1-9])-\d\d</w>
|
|
11
|
+
<w msd="date_dd/mm/yy">([12][0-9]|3[01]|0*[1-9])/(1[012]||0*[1-9])/\d\d</w>
|
|
12
|
+
<w msd="date_dd.mm.yy">([12][0-9]|3[01]|0*[1-9])[\.](1[012]||0*[1-9])[\.]\d\d</w>
|
|
13
|
+
<w msd="date_dd-mm-yyyy">([12][0-9]|3[01]|0*[1-9])-(1[012]||0*[1-9])-(19|20)\d\d</w>
|
|
14
|
+
<w msd="date_dd/mm/yyyy">([12][0-9]|3[01]|0*[1-9])/(1[012]||0*[1-9])/(19|20)\d\d</w>
|
|
15
|
+
<w msd="date_dd.mm.yyyy">([12][0-9]|3[01]|0*[1-9])\.(1[012]||0*[1-9])\.(19|20)\d\d</w>
|
|
16
|
+
<w msd="date_dd-mm">(0*[1-9]|[12][0-9]|3[01])[-/\.](1[012]|0*[1-9])</w>
|
|
17
|
+
<w msd="date_mm-yy">(0*[1-9]|1[012])[-/\.]\d\d</w>
|
|
18
|
+
<w msd="date_mm-yyyy">(0*[1-9]|1[012])[-/\.](19|20)\d\d</w>
|
|
19
|
+
<w msd="date_yyyy">(19|20)\d\d</w>
|
|
20
|
+
<w msd="date_mm-dd-yyyy">(0*[1-9]|1[012])-([12][0-9]|3[01]|0*[1-9])-(19|20)\d\d</w>
|
|
21
|
+
<w msd="date_mm/dd/yyyy">(0*[1-9]|1[012])/([12][0-9]|3[01]|0*[1-9])/(19|20)\d\d</w>
|
|
22
|
+
<w msd="date_mm.dd.yyyy">(0*[1-9]|1[012])\.([12][0-9]|3[01]|0*[1-9])\.(19|20)\d\d</w>
|
|
23
|
+
<w msd="date_yyyy-mm-dd">(19|20)\d\d-(0*[1-9]|1[012])-([12][0-9]|3[01]|0*[1-9])</w>
|
|
24
|
+
<w msd="date_yyyy/mm/dd">(19|20)\d\d/(0*[1-9]|1[012])/([12][0-9]|3[01]|0*[1-9])</w>
|
|
25
|
+
<w msd="date_yyyy.mm.dd">(19|20)\d\d\.(0*[1-9]|1[012])\.([12][0-9]|3[01]|0*[1-9])</w>
|
|
26
|
+
<w msd="hhmmss">([0-1]\d|[2][0-3]):[0-5]\d:[0-5]\d</w>
|
|
27
|
+
<w msd="percent">([0-9]*[\.,])?[0-9]+%</w>
|
|
28
|
+
<w msd="name1">[A-ZÁÂĐÍÔƯỨÝỶ][aàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz]*</w>
|
|
29
|
+
<w msd="name2">([A-ZÁÂĐÍÔƯỨÝỶ][aàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz]*)(\s+[A-ZÁÂĐÍÔƯỨÝỶ][aàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz]+)*</w>
|
|
30
|
+
<w msd="phrase">([AÀẢÃÁẠĂẰẲẴẮẶÂẦẨẪẤẬBCDĐEÈẺẼÉẸÊỀỂỄẾỆFGHIÌỈĨÍỊJKLMNOÒỎÕÓỌÔỒỔỖỐỘƠỜỞỠỚỢPQRSTUÙỦŨÚỤƯỪỬỮỨỰVWXYỲỶỸÝỴZ])?([aàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz\s])*([aàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz])+$*</w>
|
|
31
|
+
<w msd="allcaps">([AÀẢÃÁẠĂẰẲẴẮẶÂẦẨẪẤẬBCDĐEÈẺẼÉẸÊỀỂỄẾỆFGHIÌỈĨÍỊJKLMNOÒỎÕÓỌÔỒỔỖỐỘƠỜỞỠỚỢPQRSTUÙỦŨÚỤƯỪỬỮỨỰVWXYỲỶỸÝỴZ]+)(\s*[AÀẢÃÁẠĂẰẲẴẮẶÂẦẨẪẤẬBCDĐEÈẺẼÉẸÊỀỂỄẾỆFGHIÌỈĨÍỊJKLMNOÒỎÕÓỌÔỒỔỖỐỘƠỜỞỠỚỢPQRSTUÙỦŨÚỤƯỪỬỮỨỰVWXYỲỶỸÝỴZ]+)*[^aàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz/\)\(\?!\.;:,\-"']</w>
|
|
32
|
+
<w msd="fraction">(\d+)/(\d+)</w>
|
|
33
|
+
<w msd="email">(\w[-._\w]*\w@\w[-._\w]*\w\.\w{2,3})</w>
|
|
34
|
+
<w msd="return">(^$)</w>
|
|
35
|
+
<w msd="fslash">/</w>
|
|
36
|
+
<w msd="langle"><</w>
|
|
37
|
+
<w msd="xmltags"></*\w*></w>
|
|
38
|
+
<w msd="equal">=</w>
|
|
39
|
+
<w msd="rangle">></w>
|
|
40
|
+
<w msd="aroba">@</w>
|
|
41
|
+
<w msd="number1">[+]?([0-9]*)?[0-9]+([\.,]\d+)*</w>
|
|
42
|
+
<!--
|
|
43
|
+
<w msd="number2">[+]?([0-9]*)?[0-9]+([\.,]\d+)*(\s|tỉ|tỷ|triệu|ngàn|nghìn|trăm|chục)*</w>
|
|
44
|
+
-->
|
|
45
|
+
<w msd="degree">[-+]?([0-9]*[\.,])?[0-9]+°</w>
|
|
46
|
+
<w msd="ponctuation">[\\?!\\.:;,\-"']</w>
|
|
47
|
+
<w msd="dollar">\$</w>
|
|
48
|
+
<w msd="lparen">\(</w>
|
|
49
|
+
<w msd="rparen">\)</w>
|
|
50
|
+
<w msd="asterisk">\*</w>
|
|
51
|
+
<w msd="plus">\+</w>
|
|
52
|
+
<w msd="minus">\-</w>
|
|
53
|
+
<w msd="ellipsis">\.\.\.</w>
|
|
54
|
+
<w msd="residual">\W</w>
|
|
55
|
+
<w msd="lbracket">\[</w>
|
|
56
|
+
<w msd="bslash">\\</w>
|
|
57
|
+
<w msd="rbracket">\]</w>
|
|
58
|
+
<w msd="entity0">\d+([\.,]\d+)*[A-Zaàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz]+\d+$</w>
|
|
59
|
+
<w msd="entity1">[A-Zaàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz]+(\d)*$</w>
|
|
60
|
+
<w msd="entity2">[A-Zaàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz\d]+([\.\-/][\dA-Zaàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz]+)*[\dA-Zaàảãáạăằẳẵắặâầẩẫấậbcdđeèẻẽéẹêềểễếệfghiìỉĩíịjklmnoòỏõóọôồổỗốộơờởỡớợpqrstuùủũúụưừửữứựvwxyỳỷỹýỵz]+</w>
|
|
61
|
+
<w msd="space">\s+</w>
|
|
62
|
+
<w msd="word">\w</w>
|
|
63
|
+
<w msd="lcbrace">\{</w>
|
|
64
|
+
<w msd="rcbrace">\}</w>
|
|
65
|
+
<w msd="underscore">_</w>
|
|
66
|
+
<w msd="pound">£</w>
|
|
67
|
+
</body>
|
|
68
|
+
</corpus>
|
|
Binary file
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
## tagger training invoked at Thu Aug 05 14:33:50 CEST 2010 with arguments:
|
|
2
|
+
model = resources/models/vtb.tagger
|
|
3
|
+
arch = left5words,vietnameseunknowns
|
|
4
|
+
trainFile = data/vtb-20091030.tagged.txt
|
|
5
|
+
closedClassTags =
|
|
6
|
+
closedClassTagThreshold = 40
|
|
7
|
+
curWordMinFeatureThresh = 2
|
|
8
|
+
debug = true
|
|
9
|
+
debugPrefix =
|
|
10
|
+
tagSeparator = /
|
|
11
|
+
encoding = UTF-8
|
|
12
|
+
initFromTrees = false
|
|
13
|
+
iterations = 100
|
|
14
|
+
lang = vietnamese
|
|
15
|
+
learnClosedClassTags = false
|
|
16
|
+
minFeatureThresh = 2
|
|
17
|
+
openClassTags =
|
|
18
|
+
rareWordMinFeatureThresh = 10
|
|
19
|
+
rareWordThresh = 6
|
|
20
|
+
search = qn
|
|
21
|
+
sgml = false
|
|
22
|
+
sigmaSquared = 0.5
|
|
23
|
+
regL1 = 1.0
|
|
24
|
+
tagInside =
|
|
25
|
+
tokenize = false
|
|
26
|
+
tokenizerFactory =
|
|
27
|
+
tokenizerOptions =
|
|
28
|
+
treeRange =
|
|
29
|
+
treeNormalizer =
|
|
30
|
+
treeTransformer =
|
|
31
|
+
verbose = false
|
|
32
|
+
veryCommonWordThresh = 50
|
|
33
|
+
xmlInput =
|
|
34
|
+
outputFile =
|
|
35
|
+
outputFormat = slashTags
|
|
36
|
+
outputFormatOptions =
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
|
|
2
|
+
<corpus id="resources/prefix/namedEntityPrefix.xml">
|
|
3
|
+
<body>
|
|
4
|
+
<!-- classificator prefix -->
|
|
5
|
+
<w>ông</w>
|
|
6
|
+
<w>bà</w>
|
|
7
|
+
<w>bác</w>
|
|
8
|
+
<w>chú</w>
|
|
9
|
+
<w>cô</w>
|
|
10
|
+
<w>thím</w>
|
|
11
|
+
<w>dì</w>
|
|
12
|
+
<w>cậu</w>
|
|
13
|
+
<w>cụ</w>
|
|
14
|
+
<w>mợ</w>
|
|
15
|
+
<w>ngài</w>
|
|
16
|
+
<w>anh</w>
|
|
17
|
+
<w>chị</w>
|
|
18
|
+
<w>thằng</w>
|
|
19
|
+
<w>cái</w>
|
|
20
|
+
<w>vua</w>
|
|
21
|
+
<!-- organization prefix -->
|
|
22
|
+
<w>tỉnh</w>
|
|
23
|
+
<w>huyện</w>
|
|
24
|
+
<w>xã</w>
|
|
25
|
+
<w>phường</w>
|
|
26
|
+
<w>bộ</w>
|
|
27
|
+
<w>sở</w>
|
|
28
|
+
<w>cục</w>
|
|
29
|
+
<w>ban</w>
|
|
30
|
+
<w>ngành</w>
|
|
31
|
+
<!-- Other prefix -->
|
|
32
|
+
<w>theo</w>
|
|
33
|
+
</body>
|
|
34
|
+
</corpus>
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'byebug'
|
|
3
|
+
|
|
4
|
+
describe VnTagger::Tagger do
|
|
5
|
+
describe '#tag' do
|
|
6
|
+
let(:text) { 'HLV cùa Chelsea không hối tiếc vì hành động bỏ về sớm trong trận gặp Aston Villa.' }
|
|
7
|
+
let(:tagger) { described_class.new(text) }
|
|
8
|
+
let(:result) { tagger.tag }
|
|
9
|
+
|
|
10
|
+
it 'returns xml tagged text' do
|
|
11
|
+
expect(result).to be_a(Nokogiri::XML::Document)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
Binary file
|
data/vnTagger.sh
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/bin/sh
|
|
2
|
+
|
|
3
|
+
# The main program to run
|
|
4
|
+
PROGRAM="${BASH_SOURCE%/*}/vn.hus.nlp.tagger-4.2.0.jar"
|
|
5
|
+
|
|
6
|
+
# Get the java command
|
|
7
|
+
#
|
|
8
|
+
if [ -z "$JAVACMD" ] ; then
|
|
9
|
+
if [ -n "$JAVA_HOME" ] ; then
|
|
10
|
+
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
|
11
|
+
JAVACMD="$JAVA_HOME/jre/sh/java"
|
|
12
|
+
else
|
|
13
|
+
JAVACMD="$JAVA_HOME/bin/java"
|
|
14
|
+
fi
|
|
15
|
+
else
|
|
16
|
+
JAVACMD=`which java 2> /dev/null`
|
|
17
|
+
if [ -z "$JAVACMD" ] ; then
|
|
18
|
+
JAVACMD=java
|
|
19
|
+
fi
|
|
20
|
+
fi
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
# Run the programme
|
|
24
|
+
#
|
|
25
|
+
$JAVACMD -mx500m -jar $PROGRAM $@
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
data/vn_tagger.gemspec
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'vn_tagger/version'
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |spec|
|
|
7
|
+
spec.name = "vn_tagger"
|
|
8
|
+
spec.version = VnTagger::VERSION
|
|
9
|
+
spec.authors = ["Hieu Nguyen"]
|
|
10
|
+
spec.email = ["hieuk09@gmail.com"]
|
|
11
|
+
spec.summary = %q{This is a wrapper for vn_tagger library, a A POS tagger for Vietnamese texts.' }
|
|
12
|
+
spec.description = %q{This is a wrapper for vn_tagger library, a A POS tagger for Vietnamese texts.' }
|
|
13
|
+
spec.homepage = ""
|
|
14
|
+
spec.license = "MIT"
|
|
15
|
+
|
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
19
|
+
spec.require_paths = ["lib", 'lib/vn_tagger']
|
|
20
|
+
|
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
|
22
|
+
spec.add_development_dependency "rake"
|
|
23
|
+
spec.add_development_dependency "rspec"
|
|
24
|
+
spec.add_development_dependency "byebug"
|
|
25
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: vn_tagger
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.4
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Hieu Nguyen
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2014-12-17 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: bundler
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.6'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.6'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rspec
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: byebug
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
69
|
+
description: 'This is a wrapper for vn_tagger library, a A POS tagger for Vietnamese
|
|
70
|
+
texts.'' '
|
|
71
|
+
email:
|
|
72
|
+
- hieuk09@gmail.com
|
|
73
|
+
executables: []
|
|
74
|
+
extensions: []
|
|
75
|
+
extra_rdoc_files: []
|
|
76
|
+
files:
|
|
77
|
+
- ".gitignore"
|
|
78
|
+
- Gemfile
|
|
79
|
+
- LICENSE.txt
|
|
80
|
+
- README.md
|
|
81
|
+
- Rakefile
|
|
82
|
+
- VN_TAGGER LICENSE.txt
|
|
83
|
+
- lib/commons-cli-1.2.jar
|
|
84
|
+
- lib/edu.stanford.nlp.tagger-2.0.jar
|
|
85
|
+
- lib/vn.hus.nlp.fsm-1.0.0.jar
|
|
86
|
+
- lib/vn.hus.nlp.tokenizer-4.1.1.jar
|
|
87
|
+
- lib/vn.hus.nlp.utils-1.0.0.jar
|
|
88
|
+
- lib/vn_tagger.rb
|
|
89
|
+
- lib/vn_tagger/tagger.rb
|
|
90
|
+
- lib/vn_tagger/version.rb
|
|
91
|
+
- resources/automata/dfaLexicon.xml
|
|
92
|
+
- resources/automata/externalLexicon.xml
|
|
93
|
+
- resources/bigram/bigram.xml
|
|
94
|
+
- resources/bigram/unigram.xml
|
|
95
|
+
- resources/lexers/lexers.xml
|
|
96
|
+
- resources/models/vtb.tagger
|
|
97
|
+
- resources/models/vtb.tagger.props
|
|
98
|
+
- resources/normalization/rules.txt
|
|
99
|
+
- resources/prefix/namedEntityPrefix.xml
|
|
100
|
+
- spec/spec_helper.rb
|
|
101
|
+
- spec/vn_tagger/tagger_spec.rb
|
|
102
|
+
- spec/vn_tagger_spec.rb
|
|
103
|
+
- vn.hus.nlp.tagger-4.2.0.jar
|
|
104
|
+
- vnTagger.sh
|
|
105
|
+
- vn_tagger.gemspec
|
|
106
|
+
homepage: ''
|
|
107
|
+
licenses:
|
|
108
|
+
- MIT
|
|
109
|
+
metadata: {}
|
|
110
|
+
post_install_message:
|
|
111
|
+
rdoc_options: []
|
|
112
|
+
require_paths:
|
|
113
|
+
- lib
|
|
114
|
+
- lib/vn_tagger
|
|
115
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
116
|
+
requirements:
|
|
117
|
+
- - ">="
|
|
118
|
+
- !ruby/object:Gem::Version
|
|
119
|
+
version: '0'
|
|
120
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - ">="
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: '0'
|
|
125
|
+
requirements: []
|
|
126
|
+
rubyforge_project:
|
|
127
|
+
rubygems_version: 2.4.3
|
|
128
|
+
signing_key:
|
|
129
|
+
specification_version: 4
|
|
130
|
+
summary: This is a wrapper for vn_tagger library, a A POS tagger for Vietnamese texts.'
|
|
131
|
+
test_files:
|
|
132
|
+
- spec/spec_helper.rb
|
|
133
|
+
- spec/vn_tagger/tagger_spec.rb
|
|
134
|
+
- spec/vn_tagger_spec.rb
|