ve 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +2 -0
- data/.travis.yml +3 -6
- data/Gemfile +8 -6
- data/Gemfile.lock +29 -19
- data/LICENSE.txt +21 -0
- data/Readme.md +42 -5
- data/java/.gitignore +4 -0
- data/java/build.gradle +38 -0
- data/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/java/gradle/wrapper/gradle-wrapper.properties +5 -0
- data/java/gradlew +185 -0
- data/java/gradlew.bat +104 -0
- data/java/pom.xml +56 -0
- data/java/readme.md +103 -0
- data/java/settings.gradle +1 -0
- data/java/src/main/java/ve/Grammar.java +10 -0
- data/java/src/main/java/ve/Parse.java +336 -0
- data/java/src/main/java/ve/Pos.java +27 -0
- data/java/src/main/java/ve/Word.java +104 -0
- data/java/src/test/java/ve/VeTest.java +41 -0
- data/lib/part_of_speech.rb +1 -1
- data/lib/providers/freeling_en.rb +29 -28
- data/lib/providers/japanese_transliterators.rb +14 -14
- data/lib/providers/mecab_ipadic.rb +10 -10
- data/lib/ve.rb +21 -15
- data/lib/word.rb +19 -12
- data/sinatra/server.rb +35 -2
- data/tests/japanese_transliterators_test.rb +8 -5
- data/tests/mecab_ipadic_parse_test.rb +12 -0
- data/tests/test_helper.rb +0 -1
- data/tests/ve_test.rb +0 -1
- data/ve.gemspec +9 -7
- metadata +24 -9
@@ -41,10 +41,6 @@ class Ve
|
|
41
41
|
end
|
42
42
|
|
43
43
|
Ve::Parse::MecabIpadic.new(text, output)
|
44
|
-
rescue => e
|
45
|
-
# TODO: No good to catch all errors like this
|
46
|
-
# I need a backtrace when something unexpected fails
|
47
|
-
Ve::Parse::MecabIpadic.new(text, [])
|
48
44
|
end
|
49
45
|
|
50
46
|
private
|
@@ -255,12 +251,16 @@ class Ve
|
|
255
251
|
also_attach_to_lemma = true
|
256
252
|
end
|
257
253
|
when SETSUBI
|
258
|
-
if token[:pos3] ==
|
259
|
-
attach_to_previous = true
|
260
|
-
update_pos = true
|
261
|
-
pos = Ve::PartOfSpeech::Noun
|
262
|
-
else
|
254
|
+
if token[:pos3] == JINMEI
|
263
255
|
pos = Ve::PartOfSpeech::Suffix
|
256
|
+
else
|
257
|
+
if token[:pos3] == TOKUSHU && token[:lemma] == SA
|
258
|
+
update_pos = true
|
259
|
+
pos = Ve::PartOfSpeech::Noun
|
260
|
+
else
|
261
|
+
also_attach_to_lemma = true
|
262
|
+
end
|
263
|
+
attach_to_previous = true
|
264
264
|
end
|
265
265
|
when SETSUZOKUSHITEKI
|
266
266
|
pos = Ve::PartOfSpeech::Conjunction
|
@@ -341,7 +341,7 @@ class Ve
|
|
341
341
|
|
342
342
|
words << word
|
343
343
|
end
|
344
|
-
|
344
|
+
|
345
345
|
previous = token
|
346
346
|
end
|
347
347
|
rescue StopIteration
|
data/lib/ve.rb
CHANGED
@@ -8,14 +8,13 @@ require 'languages/japanese'
|
|
8
8
|
require 'pp'
|
9
9
|
|
10
10
|
class Ve
|
11
|
-
|
12
11
|
class Manager
|
13
12
|
@@config_for = {}
|
14
|
-
|
13
|
+
|
15
14
|
def self.set_default_config_for(klass, config = {})
|
16
15
|
@@config_for[klass] = config
|
17
16
|
end
|
18
|
-
|
17
|
+
|
19
18
|
def self.provider_for(language, function)
|
20
19
|
provider = @@provider_for[language.to_sym][function.to_sym]
|
21
20
|
if provider.is_a?(Class)
|
@@ -26,6 +25,14 @@ class Ve
|
|
26
25
|
provider
|
27
26
|
end
|
28
27
|
|
28
|
+
def self.languages
|
29
|
+
@@provider_for.keys
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.functions_for_language(language)
|
33
|
+
@@provider_for[language.to_sym].keys
|
34
|
+
end
|
35
|
+
|
29
36
|
# TODO: Make a difference between what features are available locally
|
30
37
|
# and what requires contacting external Ves
|
31
38
|
def self.register(klass, language)
|
@@ -41,9 +48,9 @@ class Ve
|
|
41
48
|
end
|
42
49
|
end
|
43
50
|
end
|
44
|
-
|
51
|
+
|
45
52
|
# TODO: Put into separate files
|
46
|
-
class LocalInterface
|
53
|
+
class LocalInterface
|
47
54
|
def initialize(language, config = {})
|
48
55
|
@language = language
|
49
56
|
end
|
@@ -54,12 +61,12 @@ class Ve
|
|
54
61
|
parse.send(function.to_sym)
|
55
62
|
end
|
56
63
|
end
|
57
|
-
|
64
|
+
|
58
65
|
class HTTPInterface
|
59
66
|
require 'net/http'
|
60
67
|
require 'uri'
|
61
68
|
require 'json'
|
62
|
-
|
69
|
+
|
63
70
|
def initialize(language, config = {})
|
64
71
|
@language = language
|
65
72
|
@base_url = config[:url]
|
@@ -71,7 +78,7 @@ class Ve
|
|
71
78
|
response = Net::HTTP.post_form(uri, {:text => args[0]})
|
72
79
|
data = JSON.parse(response.body)
|
73
80
|
result = []
|
74
|
-
|
81
|
+
|
75
82
|
data.each do |obj|
|
76
83
|
# TODO: Support transliterations
|
77
84
|
case obj['_class']
|
@@ -79,31 +86,31 @@ class Ve
|
|
79
86
|
result << Ve::Word.new(obj['word'], obj['lemma'], obj['part_of_speech'], obj['tokens'], obj['extra'], obj['info'])
|
80
87
|
end
|
81
88
|
end
|
82
|
-
|
89
|
+
|
83
90
|
result
|
84
91
|
end
|
85
92
|
end
|
86
|
-
|
93
|
+
|
87
94
|
@@interface = Ve::LocalInterface
|
88
95
|
@@interface_for = {}
|
89
96
|
@@config = {}
|
90
|
-
|
97
|
+
|
91
98
|
# End-users only interact with this class, so it must provide a sexy interface
|
92
99
|
# to all functionality in the providers and parse objects
|
93
|
-
|
100
|
+
|
94
101
|
# Basic, non-sexy, local interface only
|
95
102
|
def self.get(text, language, function, *args)
|
96
103
|
provider = Ve::Manager.provider_for(language, function, *args)
|
97
104
|
parse = provider.parse(text, args)
|
98
105
|
parse.send(function.to_sym)
|
99
106
|
end
|
100
|
-
|
107
|
+
|
101
108
|
# Early sexy verision
|
102
109
|
def self.in(language)
|
103
110
|
unless @@interface_for[language]
|
104
111
|
@@interface_for[language] = @@interface.new(language, @@config)
|
105
112
|
end
|
106
|
-
|
113
|
+
|
107
114
|
@@interface_for[language]
|
108
115
|
end
|
109
116
|
|
@@ -111,7 +118,6 @@ class Ve
|
|
111
118
|
@@interface = interface
|
112
119
|
@@config = config
|
113
120
|
end
|
114
|
-
|
115
121
|
end
|
116
122
|
|
117
123
|
# TODO: Autoload this shit
|
data/lib/word.rb
CHANGED
@@ -1,20 +1,20 @@
|
|
1
1
|
class Ve
|
2
2
|
class Word
|
3
|
-
|
3
|
+
|
4
4
|
attr_accessor :word, :lemma, :part_of_speech, :tokens, :extra, :info
|
5
|
-
|
5
|
+
|
6
6
|
# TODO: More elegance
|
7
7
|
def initialize(word, lemma, part_of_speech, tokens, extra = {}, info = {})
|
8
8
|
@word = word.dup
|
9
9
|
@lemma = lemma.dup
|
10
10
|
@part_of_speech = part_of_speech
|
11
11
|
@tokens = tokens
|
12
|
-
|
12
|
+
|
13
13
|
# TODO: I don't like this, it's too unstructured
|
14
14
|
@extra = extra
|
15
15
|
@info = info
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
# TODO: the main part of a word, for example 重要 in 重要な
|
19
19
|
def main_part
|
20
20
|
end
|
@@ -22,22 +22,29 @@ class Ve
|
|
22
22
|
def base_form
|
23
23
|
@lemma
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
def inflected?
|
27
27
|
@word != @lemma
|
28
28
|
end
|
29
29
|
|
30
|
-
def as_json
|
31
|
-
{
|
32
|
-
:_class => 'Word',
|
30
|
+
def as_json(verbose = true)
|
31
|
+
hash = {
|
33
32
|
:word => @word,
|
34
33
|
:lemma => @lemma,
|
35
34
|
:part_of_speech => @part_of_speech.name,
|
36
|
-
:
|
37
|
-
:extra => @extra,
|
38
|
-
:info => @info
|
35
|
+
:extra => @extra
|
39
36
|
}
|
37
|
+
|
38
|
+
if verbose
|
39
|
+
hash.merge!({
|
40
|
+
:_class => 'Word',
|
41
|
+
:tokens => @tokens,
|
42
|
+
:info => @info
|
43
|
+
})
|
44
|
+
end
|
45
|
+
|
46
|
+
hash
|
40
47
|
end
|
41
|
-
|
48
|
+
|
42
49
|
end
|
43
50
|
end
|
data/sinatra/server.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'bundler/setup'
|
3
5
|
require 'sinatra'
|
@@ -12,6 +14,29 @@ use Rack::Cors do
|
|
12
14
|
end
|
13
15
|
end
|
14
16
|
|
17
|
+
get '/' do
|
18
|
+
content_type 'application/json', :charset => 'utf-8'
|
19
|
+
|
20
|
+
howto = {
|
21
|
+
"meta" => {"status" => 200},
|
22
|
+
"usage" => "/:language/:function?text=X",
|
23
|
+
"languages" => {}
|
24
|
+
}
|
25
|
+
|
26
|
+
Ve::Manager.languages.each do |lang|
|
27
|
+
lang_functions = {"#{lang}" => [], }
|
28
|
+
|
29
|
+
Ve::Manager.functions_for_language(lang).each do |func|
|
30
|
+
functional = Ve::Manager.provider_for(lang, func).works?
|
31
|
+
lang_functions[lang.to_s] << {"name" => func.to_s, "functional" => functional}
|
32
|
+
end
|
33
|
+
|
34
|
+
howto["languages"].merge!(lang_functions)
|
35
|
+
end
|
36
|
+
|
37
|
+
howto.to_json
|
38
|
+
end
|
39
|
+
|
15
40
|
get '/:language/:function' do
|
16
41
|
run
|
17
42
|
end
|
@@ -26,11 +51,19 @@ def run
|
|
26
51
|
# Ve.source = Ve::Local # Default
|
27
52
|
# Ve.source = Ve::Remote.new(:url => 'http://ve.kimtaro.com/', :access_token => 'XYZ')
|
28
53
|
# result = Ve.get(params[:text], params[:language], params[:function].to_sym)
|
29
|
-
|
54
|
+
|
55
|
+
if !Ve::Manager.functions_for_language(params[:language]).include?(params[:function].to_sym)
|
56
|
+
status 404
|
57
|
+
content_type 'application/json', :charset => 'utf-8'
|
58
|
+
return '{"meta": {"status": 404}}'
|
59
|
+
end
|
60
|
+
|
61
|
+
result = Ve.in(params[:language]).send(params[:function], params[:text])
|
62
|
+
verbose = params[:verbose] == 'true'
|
30
63
|
|
31
64
|
case params[:function].to_sym
|
32
65
|
when :words
|
33
|
-
json = JSON.generate(result.collect(
|
66
|
+
json = JSON.generate(result.collect { |w| w.as_json(verbose) })
|
34
67
|
else
|
35
68
|
json = result
|
36
69
|
end
|
@@ -8,11 +8,11 @@ class JapaneseTransliteratorsTest < MiniTest::Unit::TestCase
|
|
8
8
|
HIRAGANA = "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ"
|
9
9
|
HALFWIDTH = "!\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ "
|
10
10
|
FULLWIDTH = "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ "
|
11
|
-
|
11
|
+
|
12
12
|
def setup
|
13
13
|
@trans = Ve::Provider::JapaneseTransliterators.new
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def test_should_be_able_to_start
|
17
17
|
assert @trans.works?
|
18
18
|
end
|
@@ -25,7 +25,7 @@ class JapaneseTransliteratorsTest < MiniTest::Unit::TestCase
|
|
25
25
|
assert_equal 'shinbun', @trans.parse('しんぶん').transliterate_from_hira_to_latn
|
26
26
|
assert_equal 'appa', @trans.parse('あっぱ').transliterate_from_hira_to_latn
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
def test_transliterate_from_latn_to_hrkt
|
30
30
|
assert_equal('かなです', @trans.parse('kanadesu').transliterate_from_latn_to_hrkt)
|
31
31
|
assert_equal('こそあど', @trans.parse('kosoado').transliterate_from_latn_to_hrkt)
|
@@ -53,6 +53,9 @@ class JapaneseTransliteratorsTest < MiniTest::Unit::TestCase
|
|
53
53
|
|
54
54
|
# Non-Japanese
|
55
55
|
assert_equal('てぃs いs そめ えんgりsh', @trans.parse('this is some english').transliterate_from_latn_to_hrkt)
|
56
|
+
|
57
|
+
assert_equal('ばっは', @trans.parse('bahha').transliterate_from_latn_to_hrkt)
|
58
|
+
assert_equal('すたっふ', @trans.parse('sutaffu').transliterate_from_latn_to_hrkt)
|
56
59
|
end
|
57
60
|
|
58
61
|
def test_transliterate_from_hira_to_kana
|
@@ -66,11 +69,11 @@ class JapaneseTransliteratorsTest < MiniTest::Unit::TestCase
|
|
66
69
|
def test_transliterate_from_hrkt_to_latn
|
67
70
|
assert_equal 'hiraganakatakana', @trans.parse('ひらがなカタカナ').transliterate_from_hrkt_to_latn
|
68
71
|
end
|
69
|
-
|
72
|
+
|
70
73
|
def test_transliterate_from_fullwidth_to_halfwidth
|
71
74
|
assert_equal HALFWIDTH, @trans.parse(FULLWIDTH).transliterate_from_fullwidth_to_halfwidth
|
72
75
|
end
|
73
|
-
|
76
|
+
|
74
77
|
def test_transliterate_from_halfwidth_to_fullwidth
|
75
78
|
assert_equal FULLWIDTH, @trans.parse(HALFWIDTH).transliterate_from_halfwidth_to_fullwidth
|
76
79
|
end
|
@@ -756,6 +756,18 @@ EOR
|
|
756
756
|
は 助詞,係助詞,*,*,*,*,は,ハ,ワ,,
|
757
757
|
ない 助動詞,*,*,*,特殊・ナイ,基本形,ない,ナイ,ナイ,,
|
758
758
|
EOS
|
759
|
+
EOR
|
760
|
+
|
761
|
+
# いじめっ子
|
762
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ["いじめっ子"],
|
763
|
+
:lemmas => ["いじめっ子"],
|
764
|
+
:pos => [Ve::PartOfSpeech::Noun],
|
765
|
+
:extra => [{:reading=>"イジメッコ", :transcription=>"イジメッコ", :grammar=>nil}],
|
766
|
+
:tokens => [0..1]},
|
767
|
+
'いじめっ子', <<-EOR.split("\n"))
|
768
|
+
いじめ 名詞,一般,*,*,*,*,いじめ,イジメ,イジメ
|
769
|
+
っ子 名詞,接尾,一般,*,*,*,っ子,ッコ,ッコ
|
770
|
+
EOS
|
759
771
|
EOR
|
760
772
|
|
761
773
|
# TODO: xした should parse as adjective?
|
data/tests/test_helper.rb
CHANGED
data/tests/ve_test.rb
CHANGED
data/ve.gemspec
CHANGED
@@ -1,20 +1,22 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
1
2
|
Gem::Specification.new do |s|
|
2
3
|
s.name = 've'
|
3
|
-
s.version = '0.0.
|
4
|
+
s.version = '0.0.4'
|
4
5
|
s.platform = Gem::Platform::RUBY
|
5
|
-
s.authors = [
|
6
|
-
s.email = [
|
7
|
-
s.
|
6
|
+
s.authors = ['Kim Ahlström']
|
7
|
+
s.email = ['kim.ahlstrom@gmail.com']
|
8
|
+
s.license = 'MIT'
|
9
|
+
s.homepage = 'http://github.com/kimtaro/ve'
|
8
10
|
s.summary = 'Ve is a linguistic framework for programmers'
|
9
11
|
s.description = 'Ve is a linguistic framework for programmers.'
|
10
|
-
|
12
|
+
|
11
13
|
# The list of files to be contained in the gem
|
12
14
|
s.files = `git ls-files`.split("\n")
|
13
15
|
# s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
|
14
16
|
# s.extensions = `git ls-files ext/extconf.rb`.split("\n")
|
15
|
-
|
17
|
+
|
16
18
|
s.require_paths = ['lib']
|
17
19
|
|
18
20
|
# For C extensions
|
19
|
-
# s.extensions =
|
21
|
+
# s.extensions = 'ext/extconf.rb'
|
20
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ve
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kim Ahlström
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ve is a linguistic framework for programmers.
|
14
14
|
email:
|
@@ -17,12 +17,27 @@ executables: []
|
|
17
17
|
extensions: []
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
|
-
- .gitignore
|
21
|
-
- .travis.yml
|
20
|
+
- ".gitignore"
|
21
|
+
- ".travis.yml"
|
22
22
|
- Gemfile
|
23
23
|
- Gemfile.lock
|
24
|
+
- LICENSE.txt
|
24
25
|
- Rakefile
|
25
26
|
- Readme.md
|
27
|
+
- java/.gitignore
|
28
|
+
- java/build.gradle
|
29
|
+
- java/gradle/wrapper/gradle-wrapper.jar
|
30
|
+
- java/gradle/wrapper/gradle-wrapper.properties
|
31
|
+
- java/gradlew
|
32
|
+
- java/gradlew.bat
|
33
|
+
- java/pom.xml
|
34
|
+
- java/readme.md
|
35
|
+
- java/settings.gradle
|
36
|
+
- java/src/main/java/ve/Grammar.java
|
37
|
+
- java/src/main/java/ve/Parse.java
|
38
|
+
- java/src/main/java/ve/Pos.java
|
39
|
+
- java/src/main/java/ve/Word.java
|
40
|
+
- java/src/test/java/ve/VeTest.java
|
26
41
|
- js/test.html
|
27
42
|
- js/ve.js
|
28
43
|
- lib/language.rb
|
@@ -47,7 +62,8 @@ files:
|
|
47
62
|
- tests/ve_test.rb
|
48
63
|
- ve.gemspec
|
49
64
|
homepage: http://github.com/kimtaro/ve
|
50
|
-
licenses:
|
65
|
+
licenses:
|
66
|
+
- MIT
|
51
67
|
metadata: {}
|
52
68
|
post_install_message:
|
53
69
|
rdoc_options: []
|
@@ -55,17 +71,16 @@ require_paths:
|
|
55
71
|
- lib
|
56
72
|
required_ruby_version: !ruby/object:Gem::Requirement
|
57
73
|
requirements:
|
58
|
-
- -
|
74
|
+
- - ">="
|
59
75
|
- !ruby/object:Gem::Version
|
60
76
|
version: '0'
|
61
77
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
78
|
requirements:
|
63
|
-
- -
|
79
|
+
- - ">="
|
64
80
|
- !ruby/object:Gem::Version
|
65
81
|
version: '0'
|
66
82
|
requirements: []
|
67
|
-
|
68
|
-
rubygems_version: 2.0.3
|
83
|
+
rubygems_version: 3.0.3
|
69
84
|
signing_key:
|
70
85
|
specification_version: 4
|
71
86
|
summary: Ve is a linguistic framework for programmers
|