opener-tokenizer-base 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +148 -0
- data/bin/tokenizer-base +5 -0
- data/bin/tokenizer-de +5 -0
- data/bin/tokenizer-en +5 -0
- data/bin/tokenizer-es +5 -0
- data/bin/tokenizer-fr +5 -0
- data/bin/tokenizer-it +5 -0
- data/bin/tokenizer-nl +5 -0
- data/core/lib/Data/OptList.pm +256 -0
- data/core/lib/Params/Util.pm +866 -0
- data/core/lib/Sub/Exporter.pm +1101 -0
- data/core/lib/Sub/Exporter/Cookbook.pod +309 -0
- data/core/lib/Sub/Exporter/Tutorial.pod +280 -0
- data/core/lib/Sub/Exporter/Util.pm +354 -0
- data/core/lib/Sub/Install.pm +329 -0
- data/core/lib/Time/Stamp.pm +808 -0
- data/core/load-prefixes.pl +43 -0
- data/core/nonbreaking_prefixes/abbreviation_list.kaf +0 -0
- data/core/nonbreaking_prefixes/abbreviation_list.txt +444 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ca +533 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.de +781 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.el +448 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.en +564 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.es +758 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.fr +1027 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.is +697 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.it +641 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.nl +739 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.pl +729 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.pt +656 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ro +484 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ru +705 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sk +920 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sl +524 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sv +492 -0
- data/core/split-sentences.pl +114 -0
- data/core/text-fixer.pl +169 -0
- data/core/tokenizer-cli.pl +363 -0
- data/core/tokenizer.pl +145 -0
- data/lib/opener/tokenizers/base.rb +84 -0
- data/lib/opener/tokenizers/base/version.rb +8 -0
- data/opener-tokenizer-base.gemspec +25 -0
- metadata +134 -0
data/core/tokenizer.pl
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
#!/usr/bin/perl -w
|
2
|
+
|
3
|
+
# This class tokenizes input sentence
|
4
|
+
# Implements Moses tokenizer and it has been modified
|
5
|
+
# for OpeNER by Aitor García and Andoni Azpeitia
|
6
|
+
|
7
|
+
use FindBin;
|
8
|
+
use utf8;
|
9
|
+
|
10
|
+
my %NONBREAKING_PREFIX = ();
|
11
|
+
my $LANGUAGE;
|
12
|
+
my $SUBSTITUTE = "####";
|
13
|
+
|
14
|
+
sub init_tokenizer {
|
15
|
+
$LANGUAGE = shift(@_);
|
16
|
+
%NONBREAKING_PREFIX = %{ shift(@_) };
|
17
|
+
}
|
18
|
+
|
19
|
+
sub tokenize {
|
20
|
+
|
21
|
+
my($text) = shift(@_);
|
22
|
+
chomp($text);
|
23
|
+
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
|
24
|
+
#tokenize the dashes of the beginning of the lines
|
25
|
+
$text =~ s/^\-([^ ])/\- $1/g;
|
26
|
+
|
27
|
+
# turn into '
|
28
|
+
$text =~ s/Ž/\'/g;
|
29
|
+
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
|
30
|
+
|
31
|
+
$text = " $text ";
|
32
|
+
# seperate out all "other" special characters
|
33
|
+
$text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-\’])/ $1 /g;
|
34
|
+
#$text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-])/ $1 /g;
|
35
|
+
#multi-dots stay together
|
36
|
+
$text =~ s/\.([\.]+)/ DOTMULTI$1/g;
|
37
|
+
while($text =~ /DOTMULTI\./) {
|
38
|
+
$text =~ s/DOTMULTI\.([^\.])/DOTDOTMULTI $1/g;
|
39
|
+
$text =~ s/DOTMULTI\./DOTDOTMULTI/g;
|
40
|
+
}
|
41
|
+
# seperate out "," except if within numbers (5,300)
|
42
|
+
$text =~ s/([^\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
|
43
|
+
# separate , pre and post number
|
44
|
+
$text =~ s/([\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
|
45
|
+
$text =~ s/([^\p{IsN}])[,]([\p{IsN}])/$1 , $2/g;
|
46
|
+
|
47
|
+
# turn `into '
|
48
|
+
$text =~ s/\`/\'$SUBSTITUTE/g;
|
49
|
+
|
50
|
+
#turn '' into "
|
51
|
+
$text =~ s/\'\'/ \"$SUBSTITUTE /g;
|
52
|
+
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
|
53
|
+
#tokenize the words like '05-'06
|
54
|
+
$text =~ s/(['|’])([0-9][0-9])\-(['|’])([0-9][0-9])/$1$2 - $3$4/g;
|
55
|
+
#replace the ' with ### to don't tokenize words like '90
|
56
|
+
$text =~ s/ ['|’]([0-9][0-9])/ ###$1/g;
|
57
|
+
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
|
58
|
+
if ($LANGUAGE eq "en") {
|
59
|
+
#split contractions right
|
60
|
+
$text =~ s/([^\p{IsAlpha}])(['|’])([^\p{IsAlpha}])/$1 $2 $3/g;
|
61
|
+
$text =~ s/([^\p{IsAlpha}\p{IsN}])(['|’])([\p{IsAlpha}])/$1 $2 $3/g;
|
62
|
+
$text =~ s/([\p{IsAlpha}])(['|’])([^\p{IsAlpha}])/$1 $2 $3/g;
|
63
|
+
$text =~ s/([\p{IsAlpha}])(['|’])([\p{IsAlpha}])/$1 $2$3/g;
|
64
|
+
#special case for "1990's"
|
65
|
+
$text =~ s/([\p{IsN}])(['|’])([s])/$1 $2$3/g;
|
66
|
+
} elsif ($LANGUAGE eq "fr") {
|
67
|
+
#split contractions left
|
68
|
+
$text =~ s/([^\p{IsAlpha}])(['|’])([^\p{IsAlpha}])/$1 $2 $3/g;
|
69
|
+
$text =~ s/([^\p{IsAlpha}])(['|’])([\p{IsAlpha}])/$1 $2 $3/g;
|
70
|
+
$text =~ s/([\p{IsAlpha}])(['|’])([^\p{IsAlpha}])/$1 $2 $3/g;
|
71
|
+
$text =~ s/([\p{IsAlpha}])(['|’])([\p{IsAlpha}])/$1$2 $3/g;
|
72
|
+
} elsif ($LANGUAGE eq "it") {
|
73
|
+
#split contractions left
|
74
|
+
$text =~ s/([^\p{IsAlpha}])(['|’])([^\p{IsAlpha}])/$1 $2 $3/g;
|
75
|
+
$text =~ s/([^\p{IsAlpha}])(['|’])([\p{IsAlpha}])/$1 $2 $3/g;
|
76
|
+
$text =~ s/([\p{IsAlpha}])(['|’])([^\p{IsAlpha}])/$1 $2 $3/g;
|
77
|
+
$text =~ s/([\p{IsAlpha}])(['|’])([\p{IsAlpha}])/$1$2 $3/g;
|
78
|
+
$text =~ s/([^\p{IsAlpha}\p{IsN}]po) (['|’])([^\p{IsAlpha}])/$1$2 $3/g; # rule for "po'"
|
79
|
+
} else {
|
80
|
+
$text =~ s/\'/ \' /g;
|
81
|
+
}
|
82
|
+
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
|
83
|
+
#replace the ### with ' to tokenize words like '90
|
84
|
+
$text =~ s/ ###([0-9][0-9])/ '$1/g;
|
85
|
+
#<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
|
86
|
+
|
87
|
+
#word token method
|
88
|
+
my @words = split(/\s/,$text);
|
89
|
+
$text = "";
|
90
|
+
for (my $i=0;$i<(scalar(@words));$i++) {
|
91
|
+
my $word = $words[$i];
|
92
|
+
if ( $word =~ /^(\S+)\.$/) {
|
93
|
+
my $pre = $1;
|
94
|
+
if (($pre =~ /\./ && $pre =~ /\p{IsAlpha}/) || ($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==1) || ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[\p{IsLower}]/))) {
|
95
|
+
#no change
|
96
|
+
} elsif (($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==2) && ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[0-9]+/))) {
|
97
|
+
#no change
|
98
|
+
} else {
|
99
|
+
$word = $pre." .";
|
100
|
+
}
|
101
|
+
}
|
102
|
+
$text .= $word." ";
|
103
|
+
}
|
104
|
+
|
105
|
+
# clean up extraneous spaces
|
106
|
+
$text =~ s/ +/ /g;
|
107
|
+
$text =~ s/^ //g;
|
108
|
+
$text =~ s/ $//g;
|
109
|
+
|
110
|
+
#restore multi-dots
|
111
|
+
while($text =~ /DOTDOTMULTI/) {
|
112
|
+
$text =~ s/DOTDOTMULTI/DOTMULTI./g;
|
113
|
+
}
|
114
|
+
$text =~ s/DOTMULTI/./g;
|
115
|
+
|
116
|
+
#detokenize URLs
|
117
|
+
$text = &detokenize_urls($text);
|
118
|
+
|
119
|
+
#ensure final line break
|
120
|
+
$text .= "\n" unless $text =~ /\n$/;
|
121
|
+
return $text;
|
122
|
+
}
|
123
|
+
|
124
|
+
sub detokenize_urls {
|
125
|
+
|
126
|
+
my($text) = shift(@_);
|
127
|
+
|
128
|
+
$text =~ s/(\w{3,9}) : \/ \/ /$1:\/\//g;
|
129
|
+
my $URL_HEAD_PATTERN = "\\w{3,9}:\\/\\/|www";
|
130
|
+
my $URL_BODY_PATTERN = "\\w\\d\\.\\/\\-\\#;:=\\+\\?&_";
|
131
|
+
my $URL_SPECIAL_PATTERN = "\\/|\\?|=|&|\\+|_|\\#|:|;|\\-";
|
132
|
+
while ( $text =~ /($URL_HEAD_PATTERN)[$URL_BODY_PATTERN]+ ($URL_SPECIAL_PATTERN)/ ) {
|
133
|
+
$text =~ s/($URL_HEAD_PATTERN)([$URL_BODY_PATTERN]+) ($URL_SPECIAL_PATTERN) {0,1}(($URL_SPECIAL_PATTERN? {0,1})+)/$1.$2.$3.&clean($4)/eg;
|
134
|
+
}
|
135
|
+
|
136
|
+
return $text;
|
137
|
+
}
|
138
|
+
|
139
|
+
sub clean {
|
140
|
+
my $text = shift(@_);
|
141
|
+
$text = s/ //g;
|
142
|
+
return $text;
|
143
|
+
}
|
144
|
+
|
145
|
+
1;
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require_relative 'base/version'
|
2
|
+
|
3
|
+
module Opener
|
4
|
+
module Tokenizers
|
5
|
+
class Base
|
6
|
+
attr_reader :language
|
7
|
+
|
8
|
+
def initialize(opts={})
|
9
|
+
@language ||= opts[:language] || lang
|
10
|
+
end
|
11
|
+
|
12
|
+
def command(opts=[])
|
13
|
+
"perl -I #{lib} #{kernel} #{language} #{opts.join(' ')}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def run(opts=ARGV)
|
17
|
+
`#{command(opts)}`
|
18
|
+
end
|
19
|
+
|
20
|
+
def set_language(language)
|
21
|
+
@language = language
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
|
26
|
+
def core_dir
|
27
|
+
File.expand_path("../../../../core", __FILE__)
|
28
|
+
end
|
29
|
+
|
30
|
+
def kernel
|
31
|
+
File.join(core_dir,'tokenizer-cli.pl')
|
32
|
+
end
|
33
|
+
|
34
|
+
def lib
|
35
|
+
File.join(core_dir,'lib/') # Trailing / is required
|
36
|
+
end
|
37
|
+
|
38
|
+
def language
|
39
|
+
return @language.nil? ? nil : "-l #{@language}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def lang
|
43
|
+
'en'
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
class EN < Base
|
49
|
+
def lang
|
50
|
+
'en'
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class DE < Base
|
55
|
+
def lang
|
56
|
+
'de'
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class NL < Base
|
61
|
+
def lang
|
62
|
+
'nl'
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class ES < Base
|
67
|
+
def lang
|
68
|
+
'es'
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
class IT < Base
|
73
|
+
def lang
|
74
|
+
'it'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class FR < Base
|
79
|
+
def lang
|
80
|
+
'fr'
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.expand_path('../lib/opener/tokenizers/base/version', __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'opener-tokenizer-base'
|
5
|
+
gem.version = Opener::Tokenizers::Base::VERSION
|
6
|
+
gem.authors = ['development@olery.com']
|
7
|
+
gem.summary = 'Tokenize English, Dutch, German, Italian and Spanish to KAF'
|
8
|
+
gem.description = gem.summary
|
9
|
+
gem.homepage = 'http://opener-project.github.com/'
|
10
|
+
gem.has_rdoc = "yard"
|
11
|
+
gem.required_ruby_version = ">= 1.9.2"
|
12
|
+
|
13
|
+
gem.files = Dir.glob([
|
14
|
+
'core/**/*',
|
15
|
+
'lib/**/*',
|
16
|
+
'*.gemspec',
|
17
|
+
'README.md'
|
18
|
+
]).select { |file| File.file?(file) }
|
19
|
+
|
20
|
+
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
21
|
+
|
22
|
+
gem.add_development_dependency 'cucumber'
|
23
|
+
gem.add_development_dependency 'rspec'
|
24
|
+
gem.add_development_dependency 'rake'
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: opener-tokenizer-base
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- development@olery.com
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: cucumber
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Tokenize English, Dutch, German, Italian and Spanish to KAF
|
56
|
+
email:
|
57
|
+
executables:
|
58
|
+
- tokenizer-en
|
59
|
+
- tokenizer-it
|
60
|
+
- tokenizer-nl
|
61
|
+
- tokenizer-base
|
62
|
+
- tokenizer-es
|
63
|
+
- tokenizer-fr
|
64
|
+
- tokenizer-de
|
65
|
+
extensions: []
|
66
|
+
extra_rdoc_files: []
|
67
|
+
files:
|
68
|
+
- README.md
|
69
|
+
- bin/tokenizer-base
|
70
|
+
- bin/tokenizer-de
|
71
|
+
- bin/tokenizer-en
|
72
|
+
- bin/tokenizer-es
|
73
|
+
- bin/tokenizer-fr
|
74
|
+
- bin/tokenizer-it
|
75
|
+
- bin/tokenizer-nl
|
76
|
+
- core/lib/Data/OptList.pm
|
77
|
+
- core/lib/Params/Util.pm
|
78
|
+
- core/lib/Sub/Exporter.pm
|
79
|
+
- core/lib/Sub/Exporter/Cookbook.pod
|
80
|
+
- core/lib/Sub/Exporter/Tutorial.pod
|
81
|
+
- core/lib/Sub/Exporter/Util.pm
|
82
|
+
- core/lib/Sub/Install.pm
|
83
|
+
- core/lib/Time/Stamp.pm
|
84
|
+
- core/load-prefixes.pl
|
85
|
+
- core/nonbreaking_prefixes/abbreviation_list.kaf
|
86
|
+
- core/nonbreaking_prefixes/abbreviation_list.txt
|
87
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.ca
|
88
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.de
|
89
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.el
|
90
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.en
|
91
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.es
|
92
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.fr
|
93
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.is
|
94
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.it
|
95
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.nl
|
96
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.pl
|
97
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.pt
|
98
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.ro
|
99
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.ru
|
100
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.sk
|
101
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.sl
|
102
|
+
- core/nonbreaking_prefixes/nonbreaking_prefix.sv
|
103
|
+
- core/split-sentences.pl
|
104
|
+
- core/text-fixer.pl
|
105
|
+
- core/tokenizer-cli.pl
|
106
|
+
- core/tokenizer.pl
|
107
|
+
- lib/opener/tokenizers/base.rb
|
108
|
+
- lib/opener/tokenizers/base/version.rb
|
109
|
+
- opener-tokenizer-base.gemspec
|
110
|
+
homepage: http://opener-project.github.com/
|
111
|
+
licenses: []
|
112
|
+
metadata: {}
|
113
|
+
post_install_message:
|
114
|
+
rdoc_options: []
|
115
|
+
require_paths:
|
116
|
+
- lib
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: 1.9.2
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubyforge_project:
|
129
|
+
rubygems_version: 2.2.2
|
130
|
+
signing_key:
|
131
|
+
specification_version: 4
|
132
|
+
summary: Tokenize English, Dutch, German, Italian and Spanish to KAF
|
133
|
+
test_files: []
|
134
|
+
has_rdoc: yard
|