opener-pos-tagger-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +110 -0
  3. data/bin/pos-tagger-base +21 -0
  4. data/core/mapping.postag.stss.to.opener.csv +52 -0
  5. data/core/mapping.postag.wotan.to.opener.csv +13 -0
  6. data/core/opennlp/bin/opennlp +35 -0
  7. data/core/opennlp/bin/opennlp.bat +35 -0
  8. data/core/opennlp/lib/jwnl-1.3.3.jar +0 -0
  9. data/core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar +0 -0
  10. data/core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar +0 -0
  11. data/core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar +0 -0
  12. data/core/opennlp/models/de-pos-maxent.bin +0 -0
  13. data/core/opennlp/models/de-pos-perceptron.bin +0 -0
  14. data/core/opennlp/models/nl-pos-maxent.bin +0 -0
  15. data/core/opennlp/models/nl-pos-perceptron.bin +0 -0
  16. data/core/pos-tagger_open-nlp.py +160 -0
  17. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  18. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  19. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  20. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  21. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  22. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  23. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  24. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  25. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  26. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  27. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  28. data/core/token_matcher.py +80 -0
  29. data/ext/hack/support.rb +38 -0
  30. data/lib/opener/pos_taggers/base.rb +90 -0
  31. data/lib/opener/pos_taggers/base/version.rb +7 -0
  32. data/opener-pos-tagger-base.gemspec +29 -0
  33. data/pre_build_requirements.txt +1 -0
  34. metadata +132 -0
@@ -0,0 +1,7 @@
1
+ ## version = 0.2
2
+ ## Added timestamp to function addLinguisitcProcessor
3
+ ## 24-april-2013 --> getSingleEntieies and getSingleProperties reads both entities/props in format
4
+ ## entities -> entity -> span -> target and entities -> entity -> references -> span
5
+ ####
6
+
7
+ from KafParserMod import KafParser
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env python
2
+
3
+
4
+ #####
5
+ # 4-Mar-2013 : modified order of rules to check first if there is a merge and then if it is an extra token
6
+ # becuase of this case, where can be both: [ .. . ] [ . . . ]
7
+
8
+
9
+ def add_match(d,id_new,id_ref):
10
+ if id_new in d:
11
+ d[id_new].append(id_ref)
12
+ else:
13
+ d[id_new]=[id_ref]
14
+
15
+
16
+ def token_matcher(l_ref,l_new,super_d):
17
+ #print l_ref
18
+ #print l_new
19
+ if len(l_new)==0:
20
+ return
21
+ else:
22
+ token_ref, id_ref = l_ref[0]
23
+ token_new, id_new = l_new[0]
24
+ if token_ref == token_new:
25
+ #print 'Matching ',l_ref[0],l_new[0]
26
+ #print 'A',l_ref[0],l_new[0]
27
+ add_match(super_d,id_new,id_ref)
28
+ token_matcher(l_ref[1:],l_new[1:],super_d)
29
+ else:
30
+ if token_ref.startswith(token_new) : ##There was an split
31
+ #print 'D'
32
+ aux = (token_ref[len(token_new):],id_ref)
33
+ l_ref[0]=aux
34
+
35
+ add_match(super_d,id_new,id_ref)
36
+ token_matcher(l_ref,l_new[1:],super_d)
37
+
38
+ elif token_new.startswith(token_ref) : ##There was a merge
39
+ #print 'E'
40
+ aux = (token_new[len(token_ref):],id_new)
41
+ l_new[0]=aux
42
+ add_match(super_d,id_new,id_ref)
43
+ token_matcher(l_ref[1:],l_new,super_d)
44
+
45
+
46
+ elif len(l_new)>1 and l_new[1][0]==token_ref: ## There is an extra token in l_new
47
+ #print 'B',l_new[1][0],token_ref
48
+ token_matcher(l_ref[0:],l_new[1:],super_d)
49
+
50
+
51
+ elif len(l_ref)>1 and l_ref[1][0] == token_new: ## There is an extra token in l_ref
52
+ #print 'C',l_ref[1:],l_new[0:]
53
+ token_matcher(l_ref[1:],l_new[0:],super_d)
54
+
55
+
56
+ else: ## Imposible matching
57
+ #print 'F'
58
+ #print 'Impossible match of ',l_new[0],l_ref[0]
59
+ token_matcher(l_ref[1:],l_new[1:],super_d)
60
+
61
+
62
+ if __name__ == '__main__':
63
+ l1 = []
64
+ s1 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau (Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
65
+
66
+ s1 = 'Th is is a very simple example'
67
+ for n,t in enumerate(s1.split(' ')):
68
+ l1.append((t,'id'+str(n)))
69
+
70
+ l2 = []
71
+ #s2 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau ( Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
72
+ s2 = 'This is a very sim ple example'
73
+ for n,t in enumerate(s2.split(' ')):
74
+ l2.append((t,'id'+str(n)))
75
+
76
+ super_d = {}
77
+ token_matcher(l1,l2,super_d)
78
+ print l1
79
+ print l2
80
+ print super_d
@@ -0,0 +1,38 @@
1
+ require 'opener/build-tools'
2
+
3
+ include Opener::BuildTools::Requirements
4
+ include Opener::BuildTools::Python
5
+ include Opener::BuildTools::Files
6
+
7
+ # Directory where packages will be installed to.
8
+ PYTHON_SITE_PACKAGES = File.expand_path(
9
+ '../../../core/site-packages',
10
+ __FILE__
11
+ )
12
+
13
+ # Directory containing the temporary files.
14
+ TMP_DIRECTORY = File.expand_path('../../../tmp', __FILE__)
15
+
16
+ # Path to the pip requirements file used to install requirements before
17
+ # packaging the Gem.
18
+ PRE_BUILD_REQUIREMENTS = File.expand_path(
19
+ '../../../pre_build_requirements.txt',
20
+ __FILE__
21
+ )
22
+
23
+ # Path to the pip requirements file used to install requirements upon Gem
24
+ # installation.
25
+ PRE_INSTALL_REQUIREMENTS = File.expand_path(
26
+ '../../../pre_install_requirements.txt',
27
+ __FILE__
28
+ )
29
+
30
+ ##
31
+ # Verifies the requirements to install thi Gem.
32
+ #
33
+ def verify_requirements
34
+ require_executable('python')
35
+ require_version('python', python_version, '2.6.0')
36
+ require_executable('pip')
37
+ require_version('pip', pip_version, '1.3.1')
38
+ end
@@ -0,0 +1,90 @@
1
+ require 'open3'
2
+ require_relative 'base/version'
3
+
4
+ module Opener
5
+ module POSTaggers
6
+ ##
7
+ # The base POS tagger that supports Dutch and German.
8
+ #
9
+ # @!attribute [r] args
10
+ # @return [Array]
11
+ # @!attribute [r] options
12
+ # @return [Hash]
13
+ #
14
+ class Base
15
+ attr_reader :args, :options
16
+
17
+ ##
18
+ # @param [Hash] options
19
+ #
20
+ # @option options [Array] :args The commandline arguments to pass to the
21
+ # underlying Python script.
22
+ #
23
+ def initialize(options = {})
24
+ @args = options.delete(:args) || []
25
+ @options = options
26
+ end
27
+
28
+ ##
29
+ # Builds the command used to execute the kernel.
30
+ #
31
+ # @return [String]
32
+ #
33
+ def command
34
+ return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
35
+ end
36
+
37
+ ##
38
+ # Runs the command and returns the output of STDOUT, STDERR and the
39
+ # process information.
40
+ #
41
+ # @param [String] input The input to tag.
42
+ # @return [Array]
43
+ #
44
+ def run(input)
45
+ return capture(input)
46
+ end
47
+
48
+ protected
49
+ ##
50
+ # @return [String]
51
+ #
52
+ def adjust_python_path
53
+ site_packages = File.join(core_dir, 'site-packages')
54
+ "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
55
+ end
56
+
57
+ ##
58
+ # capture3 method doesn't work properly with Jruby, so
59
+ # this is a workaround
60
+ #
61
+ def capture(input)
62
+ Open3.popen3(*command.split(" ")) {|i, o, e, t|
63
+ out_reader = Thread.new { o.read }
64
+ err_reader = Thread.new { e.read }
65
+ i.write input
66
+ i.close
67
+ [out_reader.value, err_reader.value, t.value]
68
+ }
69
+ end
70
+
71
+ ##
72
+ # @return [String]
73
+ #
74
+ def core_dir
75
+ return File.expand_path('../../../../core', __FILE__)
76
+ end
77
+
78
+ ##
79
+ # @return [String]
80
+ #
81
+ def kernel
82
+ return File.join(core_dir, 'pos-tagger_open-nlp.py')
83
+ end
84
+ end # Base
85
+
86
+ class DE < Base
87
+ end # DE
88
+
89
+ end # POSTaggers
90
+ end # Opener
@@ -0,0 +1,7 @@
1
+ module Opener
2
+ module POSTaggers
3
+ class Base
4
+ VERSION = '2.0.0'
5
+ end # Base
6
+ end # POSTaggers
7
+ end # Opener
@@ -0,0 +1,29 @@
1
+ require File.expand_path('../lib/opener/pos_taggers/base/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = 'opener-pos-tagger-base'
5
+ gem.version = Opener::POSTaggers::Base::VERSION
6
+ gem.authors = ['development@olery.com']
7
+ gem.summary = 'POS tagger for Dutch and German using OpenNLP'
8
+ gem.description = gem.summary
9
+ gem.homepage = 'http://opener-project.github.com/'
10
+
11
+ gem.required_ruby_version = '>= 1.9.2'
12
+
13
+ gem.files = Dir.glob([
14
+ 'core/**/*',
15
+ 'ext/**/*',
16
+ 'lib/**/*',
17
+ '*.gemspec',
18
+ '*_requirements.txt',
19
+ 'README.md'
20
+ ]).select { |file| File.file?(file) }
21
+
22
+ gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
23
+
24
+ gem.add_dependency 'opener-build-tools', ['>= 0.2.7']
25
+ gem.add_dependency 'rake'
26
+
27
+ gem.add_development_dependency 'rspec'
28
+ gem.add_development_dependency 'cucumber'
29
+ end
@@ -0,0 +1 @@
1
+ git+ssh://git@github.com/opener-project/VU-kaf-parser.git#egg=VUKafParserPy
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opener-pos-tagger-base
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - development@olery.com
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: opener-build-tools
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.7
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: cucumber
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: POS tagger for Dutch and German using OpenNLP
70
+ email:
71
+ executables:
72
+ - pos-tagger-base
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - README.md
77
+ - bin/pos-tagger-base
78
+ - core/mapping.postag.stss.to.opener.csv
79
+ - core/mapping.postag.wotan.to.opener.csv
80
+ - core/opennlp/bin/opennlp
81
+ - core/opennlp/bin/opennlp.bat
82
+ - core/opennlp/lib/jwnl-1.3.3.jar
83
+ - core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar
84
+ - core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar
85
+ - core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar
86
+ - core/opennlp/models/de-pos-maxent.bin
87
+ - core/opennlp/models/de-pos-perceptron.bin
88
+ - core/opennlp/models/nl-pos-maxent.bin
89
+ - core/opennlp/models/nl-pos-perceptron.bin
90
+ - core/pos-tagger_open-nlp.py
91
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO
92
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt
93
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt
94
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt
95
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt
96
+ - core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py
97
+ - core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc
98
+ - core/site-packages/pre_build/VUKafParserPy/KafParserMod.py
99
+ - core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc
100
+ - core/site-packages/pre_build/VUKafParserPy/__init__.py
101
+ - core/site-packages/pre_build/VUKafParserPy/__init__.pyc
102
+ - core/token_matcher.py
103
+ - ext/hack/support.rb
104
+ - lib/opener/pos_taggers/base.rb
105
+ - lib/opener/pos_taggers/base/version.rb
106
+ - opener-pos-tagger-base.gemspec
107
+ - pre_build_requirements.txt
108
+ homepage: http://opener-project.github.com/
109
+ licenses: []
110
+ metadata: {}
111
+ post_install_message:
112
+ rdoc_options: []
113
+ require_paths:
114
+ - lib
115
+ required_ruby_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: 1.9.2
120
+ required_rubygems_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ requirements: []
126
+ rubyforge_project:
127
+ rubygems_version: 2.2.2
128
+ signing_key:
129
+ specification_version: 4
130
+ summary: POS tagger for Dutch and German using OpenNLP
131
+ test_files: []
132
+ has_rdoc: