opener-pos-tagger-base 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +110 -0
- data/bin/pos-tagger-base +21 -0
- data/core/mapping.postag.stss.to.opener.csv +52 -0
- data/core/mapping.postag.wotan.to.opener.csv +13 -0
- data/core/opennlp/bin/opennlp +35 -0
- data/core/opennlp/bin/opennlp.bat +35 -0
- data/core/opennlp/lib/jwnl-1.3.3.jar +0 -0
- data/core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar +0 -0
- data/core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar +0 -0
- data/core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar +0 -0
- data/core/opennlp/models/de-pos-maxent.bin +0 -0
- data/core/opennlp/models/de-pos-perceptron.bin +0 -0
- data/core/opennlp/models/nl-pos-maxent.bin +0 -0
- data/core/opennlp/models/nl-pos-perceptron.bin +0 -0
- data/core/pos-tagger_open-nlp.py +160 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/token_matcher.py +80 -0
- data/ext/hack/support.rb +38 -0
- data/lib/opener/pos_taggers/base.rb +90 -0
- data/lib/opener/pos_taggers/base/version.rb +7 -0
- data/opener-pos-tagger-base.gemspec +29 -0
- data/pre_build_requirements.txt +1 -0
- metadata +132 -0
Binary file
|
@@ -0,0 +1,7 @@
|
|
1
|
+
## version = 0.2
|
2
|
+
## Added timestamp to function addLinguisitcProcessor
|
3
|
+
## 24-april-2013 --> getSingleEntieies and getSingleProperties reads both entities/props in format
|
4
|
+
## entities -> entity -> span -> target and entities -> entity -> references -> span
|
5
|
+
####
|
6
|
+
|
7
|
+
from KafParserMod import KafParser
|
Binary file
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
|
4
|
+
#####
|
5
|
+
# 4-Mar-2013 : modified order of rules to check first if there is a merge and then if it is an extra token
|
6
|
+
# becuase of this case, where can be both: [ .. . ] [ . . . ]
|
7
|
+
|
8
|
+
|
9
|
+
def add_match(d,id_new,id_ref):
|
10
|
+
if id_new in d:
|
11
|
+
d[id_new].append(id_ref)
|
12
|
+
else:
|
13
|
+
d[id_new]=[id_ref]
|
14
|
+
|
15
|
+
|
16
|
+
def token_matcher(l_ref,l_new,super_d):
|
17
|
+
#print l_ref
|
18
|
+
#print l_new
|
19
|
+
if len(l_new)==0:
|
20
|
+
return
|
21
|
+
else:
|
22
|
+
token_ref, id_ref = l_ref[0]
|
23
|
+
token_new, id_new = l_new[0]
|
24
|
+
if token_ref == token_new:
|
25
|
+
#print 'Matching ',l_ref[0],l_new[0]
|
26
|
+
#print 'A',l_ref[0],l_new[0]
|
27
|
+
add_match(super_d,id_new,id_ref)
|
28
|
+
token_matcher(l_ref[1:],l_new[1:],super_d)
|
29
|
+
else:
|
30
|
+
if token_ref.startswith(token_new) : ##There was an split
|
31
|
+
#print 'D'
|
32
|
+
aux = (token_ref[len(token_new):],id_ref)
|
33
|
+
l_ref[0]=aux
|
34
|
+
|
35
|
+
add_match(super_d,id_new,id_ref)
|
36
|
+
token_matcher(l_ref,l_new[1:],super_d)
|
37
|
+
|
38
|
+
elif token_new.startswith(token_ref) : ##There was a merge
|
39
|
+
#print 'E'
|
40
|
+
aux = (token_new[len(token_ref):],id_new)
|
41
|
+
l_new[0]=aux
|
42
|
+
add_match(super_d,id_new,id_ref)
|
43
|
+
token_matcher(l_ref[1:],l_new,super_d)
|
44
|
+
|
45
|
+
|
46
|
+
elif len(l_new)>1 and l_new[1][0]==token_ref: ## There is an extra token in l_new
|
47
|
+
#print 'B',l_new[1][0],token_ref
|
48
|
+
token_matcher(l_ref[0:],l_new[1:],super_d)
|
49
|
+
|
50
|
+
|
51
|
+
elif len(l_ref)>1 and l_ref[1][0] == token_new: ## There is an extra token in l_ref
|
52
|
+
#print 'C',l_ref[1:],l_new[0:]
|
53
|
+
token_matcher(l_ref[1:],l_new[0:],super_d)
|
54
|
+
|
55
|
+
|
56
|
+
else: ## Imposible matching
|
57
|
+
#print 'F'
|
58
|
+
#print 'Impossible match of ',l_new[0],l_ref[0]
|
59
|
+
token_matcher(l_ref[1:],l_new[1:],super_d)
|
60
|
+
|
61
|
+
|
62
|
+
if __name__ == '__main__':
|
63
|
+
l1 = []
|
64
|
+
s1 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau (Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
|
65
|
+
|
66
|
+
s1 = 'Th is is a very simple example'
|
67
|
+
for n,t in enumerate(s1.split(' ')):
|
68
|
+
l1.append((t,'id'+str(n)))
|
69
|
+
|
70
|
+
l2 = []
|
71
|
+
#s2 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau ( Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
|
72
|
+
s2 = 'This is a very sim ple example'
|
73
|
+
for n,t in enumerate(s2.split(' ')):
|
74
|
+
l2.append((t,'id'+str(n)))
|
75
|
+
|
76
|
+
super_d = {}
|
77
|
+
token_matcher(l1,l2,super_d)
|
78
|
+
print l1
|
79
|
+
print l2
|
80
|
+
print super_d
|
data/ext/hack/support.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'opener/build-tools'
|
2
|
+
|
3
|
+
include Opener::BuildTools::Requirements
|
4
|
+
include Opener::BuildTools::Python
|
5
|
+
include Opener::BuildTools::Files
|
6
|
+
|
7
|
+
# Directory where packages will be installed to.
|
8
|
+
PYTHON_SITE_PACKAGES = File.expand_path(
|
9
|
+
'../../../core/site-packages',
|
10
|
+
__FILE__
|
11
|
+
)
|
12
|
+
|
13
|
+
# Directory containing the temporary files.
|
14
|
+
TMP_DIRECTORY = File.expand_path('../../../tmp', __FILE__)
|
15
|
+
|
16
|
+
# Path to the pip requirements file used to install requirements before
|
17
|
+
# packaging the Gem.
|
18
|
+
PRE_BUILD_REQUIREMENTS = File.expand_path(
|
19
|
+
'../../../pre_build_requirements.txt',
|
20
|
+
__FILE__
|
21
|
+
)
|
22
|
+
|
23
|
+
# Path to the pip requirements file used to install requirements upon Gem
|
24
|
+
# installation.
|
25
|
+
PRE_INSTALL_REQUIREMENTS = File.expand_path(
|
26
|
+
'../../../pre_install_requirements.txt',
|
27
|
+
__FILE__
|
28
|
+
)
|
29
|
+
|
30
|
+
##
|
31
|
+
# Verifies the requirements to install thi Gem.
|
32
|
+
#
|
33
|
+
def verify_requirements
|
34
|
+
require_executable('python')
|
35
|
+
require_version('python', python_version, '2.6.0')
|
36
|
+
require_executable('pip')
|
37
|
+
require_version('pip', pip_version, '1.3.1')
|
38
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require_relative 'base/version'
|
3
|
+
|
4
|
+
module Opener
|
5
|
+
module POSTaggers
|
6
|
+
##
|
7
|
+
# The base POS tagger that supports Dutch and German.
|
8
|
+
#
|
9
|
+
# @!attribute [r] args
|
10
|
+
# @return [Array]
|
11
|
+
# @!attribute [r] options
|
12
|
+
# @return [Hash]
|
13
|
+
#
|
14
|
+
class Base
|
15
|
+
attr_reader :args, :options
|
16
|
+
|
17
|
+
##
|
18
|
+
# @param [Hash] options
|
19
|
+
#
|
20
|
+
# @option options [Array] :args The commandline arguments to pass to the
|
21
|
+
# underlying Python script.
|
22
|
+
#
|
23
|
+
def initialize(options = {})
|
24
|
+
@args = options.delete(:args) || []
|
25
|
+
@options = options
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# Builds the command used to execute the kernel.
|
30
|
+
#
|
31
|
+
# @return [String]
|
32
|
+
#
|
33
|
+
def command
|
34
|
+
return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
|
35
|
+
end
|
36
|
+
|
37
|
+
##
|
38
|
+
# Runs the command and returns the output of STDOUT, STDERR and the
|
39
|
+
# process information.
|
40
|
+
#
|
41
|
+
# @param [String] input The input to tag.
|
42
|
+
# @return [Array]
|
43
|
+
#
|
44
|
+
def run(input)
|
45
|
+
return capture(input)
|
46
|
+
end
|
47
|
+
|
48
|
+
protected
|
49
|
+
##
|
50
|
+
# @return [String]
|
51
|
+
#
|
52
|
+
def adjust_python_path
|
53
|
+
site_packages = File.join(core_dir, 'site-packages')
|
54
|
+
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# capture3 method doesn't work properly with Jruby, so
|
59
|
+
# this is a workaround
|
60
|
+
#
|
61
|
+
def capture(input)
|
62
|
+
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
63
|
+
out_reader = Thread.new { o.read }
|
64
|
+
err_reader = Thread.new { e.read }
|
65
|
+
i.write input
|
66
|
+
i.close
|
67
|
+
[out_reader.value, err_reader.value, t.value]
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
##
|
72
|
+
# @return [String]
|
73
|
+
#
|
74
|
+
def core_dir
|
75
|
+
return File.expand_path('../../../../core', __FILE__)
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# @return [String]
|
80
|
+
#
|
81
|
+
def kernel
|
82
|
+
return File.join(core_dir, 'pos-tagger_open-nlp.py')
|
83
|
+
end
|
84
|
+
end # Base
|
85
|
+
|
86
|
+
class DE < Base
|
87
|
+
end # DE
|
88
|
+
|
89
|
+
end # POSTaggers
|
90
|
+
end # Opener
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.expand_path('../lib/opener/pos_taggers/base/version', __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'opener-pos-tagger-base'
|
5
|
+
gem.version = Opener::POSTaggers::Base::VERSION
|
6
|
+
gem.authors = ['development@olery.com']
|
7
|
+
gem.summary = 'POS tagger for Dutch and German using OpenNLP'
|
8
|
+
gem.description = gem.summary
|
9
|
+
gem.homepage = 'http://opener-project.github.com/'
|
10
|
+
|
11
|
+
gem.required_ruby_version = '>= 1.9.2'
|
12
|
+
|
13
|
+
gem.files = Dir.glob([
|
14
|
+
'core/**/*',
|
15
|
+
'ext/**/*',
|
16
|
+
'lib/**/*',
|
17
|
+
'*.gemspec',
|
18
|
+
'*_requirements.txt',
|
19
|
+
'README.md'
|
20
|
+
]).select { |file| File.file?(file) }
|
21
|
+
|
22
|
+
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
23
|
+
|
24
|
+
gem.add_dependency 'opener-build-tools', ['>= 0.2.7']
|
25
|
+
gem.add_dependency 'rake'
|
26
|
+
|
27
|
+
gem.add_development_dependency 'rspec'
|
28
|
+
gem.add_development_dependency 'cucumber'
|
29
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
git+ssh://git@github.com/opener-project/VU-kaf-parser.git#egg=VUKafParserPy
|
metadata
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: opener-pos-tagger-base
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- development@olery.com
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: opener-build-tools
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.2.7
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.2.7
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: cucumber
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: POS tagger for Dutch and German using OpenNLP
|
70
|
+
email:
|
71
|
+
executables:
|
72
|
+
- pos-tagger-base
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- README.md
|
77
|
+
- bin/pos-tagger-base
|
78
|
+
- core/mapping.postag.stss.to.opener.csv
|
79
|
+
- core/mapping.postag.wotan.to.opener.csv
|
80
|
+
- core/opennlp/bin/opennlp
|
81
|
+
- core/opennlp/bin/opennlp.bat
|
82
|
+
- core/opennlp/lib/jwnl-1.3.3.jar
|
83
|
+
- core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar
|
84
|
+
- core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar
|
85
|
+
- core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar
|
86
|
+
- core/opennlp/models/de-pos-maxent.bin
|
87
|
+
- core/opennlp/models/de-pos-perceptron.bin
|
88
|
+
- core/opennlp/models/nl-pos-maxent.bin
|
89
|
+
- core/opennlp/models/nl-pos-perceptron.bin
|
90
|
+
- core/pos-tagger_open-nlp.py
|
91
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO
|
92
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt
|
93
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt
|
94
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt
|
95
|
+
- core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt
|
96
|
+
- core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py
|
97
|
+
- core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc
|
98
|
+
- core/site-packages/pre_build/VUKafParserPy/KafParserMod.py
|
99
|
+
- core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc
|
100
|
+
- core/site-packages/pre_build/VUKafParserPy/__init__.py
|
101
|
+
- core/site-packages/pre_build/VUKafParserPy/__init__.pyc
|
102
|
+
- core/token_matcher.py
|
103
|
+
- ext/hack/support.rb
|
104
|
+
- lib/opener/pos_taggers/base.rb
|
105
|
+
- lib/opener/pos_taggers/base/version.rb
|
106
|
+
- opener-pos-tagger-base.gemspec
|
107
|
+
- pre_build_requirements.txt
|
108
|
+
homepage: http://opener-project.github.com/
|
109
|
+
licenses: []
|
110
|
+
metadata: {}
|
111
|
+
post_install_message:
|
112
|
+
rdoc_options: []
|
113
|
+
require_paths:
|
114
|
+
- lib
|
115
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 1.9.2
|
120
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
requirements: []
|
126
|
+
rubyforge_project:
|
127
|
+
rubygems_version: 2.2.2
|
128
|
+
signing_key:
|
129
|
+
specification_version: 4
|
130
|
+
summary: POS tagger for Dutch and German using OpenNLP
|
131
|
+
test_files: []
|
132
|
+
has_rdoc:
|