opener-pos-tagger-base 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +110 -0
  3. data/bin/pos-tagger-base +21 -0
  4. data/core/mapping.postag.stss.to.opener.csv +52 -0
  5. data/core/mapping.postag.wotan.to.opener.csv +13 -0
  6. data/core/opennlp/bin/opennlp +35 -0
  7. data/core/opennlp/bin/opennlp.bat +35 -0
  8. data/core/opennlp/lib/jwnl-1.3.3.jar +0 -0
  9. data/core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar +0 -0
  10. data/core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar +0 -0
  11. data/core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar +0 -0
  12. data/core/opennlp/models/de-pos-maxent.bin +0 -0
  13. data/core/opennlp/models/de-pos-perceptron.bin +0 -0
  14. data/core/opennlp/models/nl-pos-maxent.bin +0 -0
  15. data/core/opennlp/models/nl-pos-perceptron.bin +0 -0
  16. data/core/pos-tagger_open-nlp.py +160 -0
  17. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  18. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  19. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  20. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  21. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  22. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  23. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  24. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  25. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  26. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  27. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  28. data/core/token_matcher.py +80 -0
  29. data/ext/hack/support.rb +38 -0
  30. data/lib/opener/pos_taggers/base.rb +90 -0
  31. data/lib/opener/pos_taggers/base/version.rb +7 -0
  32. data/opener-pos-tagger-base.gemspec +29 -0
  33. data/pre_build_requirements.txt +1 -0
  34. metadata +132 -0
@@ -0,0 +1,7 @@
1
+ ## version = 0.2
2
+ ## Added timestamp to function addLinguisitcProcessor
3
+ ## 24-april-2013 --> getSingleEntieies and getSingleProperties reads both entities/props in format
4
+ ## entities -> entity -> span -> target and entities -> entity -> references -> span
5
+ ####
6
+
7
+ from KafParserMod import KafParser
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env python
2
+
3
+
4
+ #####
5
+ # 4-Mar-2013 : modified order of rules to check first if there is a merge and then if it is an extra token
6
+ # becuase of this case, where can be both: [ .. . ] [ . . . ]
7
+
8
+
9
+ def add_match(d,id_new,id_ref):
10
+ if id_new in d:
11
+ d[id_new].append(id_ref)
12
+ else:
13
+ d[id_new]=[id_ref]
14
+
15
+
16
+ def token_matcher(l_ref,l_new,super_d):
17
+ #print l_ref
18
+ #print l_new
19
+ if len(l_new)==0:
20
+ return
21
+ else:
22
+ token_ref, id_ref = l_ref[0]
23
+ token_new, id_new = l_new[0]
24
+ if token_ref == token_new:
25
+ #print 'Matching ',l_ref[0],l_new[0]
26
+ #print 'A',l_ref[0],l_new[0]
27
+ add_match(super_d,id_new,id_ref)
28
+ token_matcher(l_ref[1:],l_new[1:],super_d)
29
+ else:
30
+ if token_ref.startswith(token_new) : ##There was an split
31
+ #print 'D'
32
+ aux = (token_ref[len(token_new):],id_ref)
33
+ l_ref[0]=aux
34
+
35
+ add_match(super_d,id_new,id_ref)
36
+ token_matcher(l_ref,l_new[1:],super_d)
37
+
38
+ elif token_new.startswith(token_ref) : ##There was a merge
39
+ #print 'E'
40
+ aux = (token_new[len(token_ref):],id_new)
41
+ l_new[0]=aux
42
+ add_match(super_d,id_new,id_ref)
43
+ token_matcher(l_ref[1:],l_new,super_d)
44
+
45
+
46
+ elif len(l_new)>1 and l_new[1][0]==token_ref: ## There is an extra token in l_new
47
+ #print 'B',l_new[1][0],token_ref
48
+ token_matcher(l_ref[0:],l_new[1:],super_d)
49
+
50
+
51
+ elif len(l_ref)>1 and l_ref[1][0] == token_new: ## There is an extra token in l_ref
52
+ #print 'C',l_ref[1:],l_new[0:]
53
+ token_matcher(l_ref[1:],l_new[0:],super_d)
54
+
55
+
56
+ else: ## Imposible matching
57
+ #print 'F'
58
+ #print 'Impossible match of ',l_new[0],l_ref[0]
59
+ token_matcher(l_ref[1:],l_new[1:],super_d)
60
+
61
+
62
+ if __name__ == '__main__':
63
+ l1 = []
64
+ s1 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau (Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
65
+
66
+ s1 = 'Th is is a very simple example'
67
+ for n,t in enumerate(s1.split(' ')):
68
+ l1.append((t,'id'+str(n)))
69
+
70
+ l2 = []
71
+ #s2 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau ( Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
72
+ s2 = 'This is a very sim ple example'
73
+ for n,t in enumerate(s2.split(' ')):
74
+ l2.append((t,'id'+str(n)))
75
+
76
+ super_d = {}
77
+ token_matcher(l1,l2,super_d)
78
+ print l1
79
+ print l2
80
+ print super_d
@@ -0,0 +1,38 @@
1
+ require 'opener/build-tools'
2
+
3
+ include Opener::BuildTools::Requirements
4
+ include Opener::BuildTools::Python
5
+ include Opener::BuildTools::Files
6
+
7
+ # Directory where packages will be installed to.
8
+ PYTHON_SITE_PACKAGES = File.expand_path(
9
+ '../../../core/site-packages',
10
+ __FILE__
11
+ )
12
+
13
+ # Directory containing the temporary files.
14
+ TMP_DIRECTORY = File.expand_path('../../../tmp', __FILE__)
15
+
16
+ # Path to the pip requirements file used to install requirements before
17
+ # packaging the Gem.
18
+ PRE_BUILD_REQUIREMENTS = File.expand_path(
19
+ '../../../pre_build_requirements.txt',
20
+ __FILE__
21
+ )
22
+
23
+ # Path to the pip requirements file used to install requirements upon Gem
24
+ # installation.
25
+ PRE_INSTALL_REQUIREMENTS = File.expand_path(
26
+ '../../../pre_install_requirements.txt',
27
+ __FILE__
28
+ )
29
+
30
+ ##
31
+ # Verifies the requirements to install thi Gem.
32
+ #
33
+ def verify_requirements
34
+ require_executable('python')
35
+ require_version('python', python_version, '2.6.0')
36
+ require_executable('pip')
37
+ require_version('pip', pip_version, '1.3.1')
38
+ end
@@ -0,0 +1,90 @@
1
+ require 'open3'
2
+ require_relative 'base/version'
3
+
4
+ module Opener
5
+ module POSTaggers
6
+ ##
7
+ # The base POS tagger that supports Dutch and German.
8
+ #
9
+ # @!attribute [r] args
10
+ # @return [Array]
11
+ # @!attribute [r] options
12
+ # @return [Hash]
13
+ #
14
+ class Base
15
+ attr_reader :args, :options
16
+
17
+ ##
18
+ # @param [Hash] options
19
+ #
20
+ # @option options [Array] :args The commandline arguments to pass to the
21
+ # underlying Python script.
22
+ #
23
+ def initialize(options = {})
24
+ @args = options.delete(:args) || []
25
+ @options = options
26
+ end
27
+
28
+ ##
29
+ # Builds the command used to execute the kernel.
30
+ #
31
+ # @return [String]
32
+ #
33
+ def command
34
+ return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
35
+ end
36
+
37
+ ##
38
+ # Runs the command and returns the output of STDOUT, STDERR and the
39
+ # process information.
40
+ #
41
+ # @param [String] input The input to tag.
42
+ # @return [Array]
43
+ #
44
+ def run(input)
45
+ return capture(input)
46
+ end
47
+
48
+ protected
49
+ ##
50
+ # @return [String]
51
+ #
52
+ def adjust_python_path
53
+ site_packages = File.join(core_dir, 'site-packages')
54
+ "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
55
+ end
56
+
57
+ ##
58
+ # capture3 method doesn't work properly with Jruby, so
59
+ # this is a workaround
60
+ #
61
+ def capture(input)
62
+ Open3.popen3(*command.split(" ")) {|i, o, e, t|
63
+ out_reader = Thread.new { o.read }
64
+ err_reader = Thread.new { e.read }
65
+ i.write input
66
+ i.close
67
+ [out_reader.value, err_reader.value, t.value]
68
+ }
69
+ end
70
+
71
+ ##
72
+ # @return [String]
73
+ #
74
+ def core_dir
75
+ return File.expand_path('../../../../core', __FILE__)
76
+ end
77
+
78
+ ##
79
+ # @return [String]
80
+ #
81
+ def kernel
82
+ return File.join(core_dir, 'pos-tagger_open-nlp.py')
83
+ end
84
+ end # Base
85
+
86
+ class DE < Base
87
+ end # DE
88
+
89
+ end # POSTaggers
90
+ end # Opener
@@ -0,0 +1,7 @@
1
+ module Opener
2
+ module POSTaggers
3
+ class Base
4
+ VERSION = '2.0.0'
5
+ end # Base
6
+ end # POSTaggers
7
+ end # Opener
@@ -0,0 +1,29 @@
1
+ require File.expand_path('../lib/opener/pos_taggers/base/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = 'opener-pos-tagger-base'
5
+ gem.version = Opener::POSTaggers::Base::VERSION
6
+ gem.authors = ['development@olery.com']
7
+ gem.summary = 'POS tagger for Dutch and German using OpenNLP'
8
+ gem.description = gem.summary
9
+ gem.homepage = 'http://opener-project.github.com/'
10
+
11
+ gem.required_ruby_version = '>= 1.9.2'
12
+
13
+ gem.files = Dir.glob([
14
+ 'core/**/*',
15
+ 'ext/**/*',
16
+ 'lib/**/*',
17
+ '*.gemspec',
18
+ '*_requirements.txt',
19
+ 'README.md'
20
+ ]).select { |file| File.file?(file) }
21
+
22
+ gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
23
+
24
+ gem.add_dependency 'opener-build-tools', ['>= 0.2.7']
25
+ gem.add_dependency 'rake'
26
+
27
+ gem.add_development_dependency 'rspec'
28
+ gem.add_development_dependency 'cucumber'
29
+ end
@@ -0,0 +1 @@
1
+ git+ssh://git@github.com/opener-project/VU-kaf-parser.git#egg=VUKafParserPy
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opener-pos-tagger-base
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - development@olery.com
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: opener-build-tools
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.7
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.7
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: cucumber
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: POS tagger for Dutch and German using OpenNLP
70
+ email:
71
+ executables:
72
+ - pos-tagger-base
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - README.md
77
+ - bin/pos-tagger-base
78
+ - core/mapping.postag.stss.to.opener.csv
79
+ - core/mapping.postag.wotan.to.opener.csv
80
+ - core/opennlp/bin/opennlp
81
+ - core/opennlp/bin/opennlp.bat
82
+ - core/opennlp/lib/jwnl-1.3.3.jar
83
+ - core/opennlp/lib/opennlp-maxent-3.0.2-incubating.jar
84
+ - core/opennlp/lib/opennlp-tools-1.5.2-incubating.jar
85
+ - core/opennlp/lib/opennlp-uima-1.5.2-incubating.jar
86
+ - core/opennlp/models/de-pos-maxent.bin
87
+ - core/opennlp/models/de-pos-perceptron.bin
88
+ - core/opennlp/models/nl-pos-maxent.bin
89
+ - core/opennlp/models/nl-pos-perceptron.bin
90
+ - core/pos-tagger_open-nlp.py
91
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO
92
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt
93
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt
94
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt
95
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt
96
+ - core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py
97
+ - core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc
98
+ - core/site-packages/pre_build/VUKafParserPy/KafParserMod.py
99
+ - core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc
100
+ - core/site-packages/pre_build/VUKafParserPy/__init__.py
101
+ - core/site-packages/pre_build/VUKafParserPy/__init__.pyc
102
+ - core/token_matcher.py
103
+ - ext/hack/support.rb
104
+ - lib/opener/pos_taggers/base.rb
105
+ - lib/opener/pos_taggers/base/version.rb
106
+ - opener-pos-tagger-base.gemspec
107
+ - pre_build_requirements.txt
108
+ homepage: http://opener-project.github.com/
109
+ licenses: []
110
+ metadata: {}
111
+ post_install_message:
112
+ rdoc_options: []
113
+ require_paths:
114
+ - lib
115
+ required_ruby_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: 1.9.2
120
+ required_rubygems_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ requirements: []
126
+ rubyforge_project:
127
+ rubygems_version: 2.2.2
128
+ signing_key:
129
+ specification_version: 4
130
+ summary: POS tagger for Dutch and German using OpenNLP
131
+ test_files: []
132
+ has_rdoc: