freeling-client 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: db5bc7707caee2de4e0609c5fe595ff9213c3f43
4
- data.tar.gz: 090a06c682c062753199ec37b21eaeebc868315a
3
+ metadata.gz: 22175d4562553eaba3d4d095df0cf9277813cfb7
4
+ data.tar.gz: 3e62780a11bab00955217a164a8aa1dfa342b952
5
5
  SHA512:
6
- metadata.gz: 5fb75e56e9efaf8842271b502bfb6df06829cb3248959f3a712bf28d0b2f4724ff796218a24334b288b5851b2849e70f91011c3c3751052b5d3b589c4bcd6352
7
- data.tar.gz: 841275c8fd541b0600229195f2fc4b82474b17c7d52a08e94a1003d6dfb860ea08f635c851c1b86612a3d6e6fd1cda288449e612301e55a73da04e11bca20486
6
+ metadata.gz: b4042283e88acaa32d476ce2575bef9f7611a798acb0b00d8c061bfec78c2aed66b8fa0b9295053d484b5a5fd8e25323f70a137a5a5a3205b3e54e349a1c2a69
7
+ data.tar.gz: 249cf116647b29dba7a8b5d6286cf6a53edf4d1d6cd58ae016ebc5a5afe696e34627c13c6d7831e1c6e18bec73ad5e3f1379a91e6688a281e71626cb3a5c2c01
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile.lock CHANGED
@@ -2,7 +2,7 @@ PATH
2
2
  remote: .
3
3
  specs:
4
4
  freeling-client (0.0.1)
5
- hashie
5
+ hashie (~> 3)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
@@ -17,5 +17,5 @@ PLATFORMS
17
17
  DEPENDENCIES
18
18
  bundler (~> 1.7)
19
19
  freeling-client!
20
- minitest
20
+ minitest (~> 5)
21
21
  rake (~> 10.3)
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Marcos Vanetta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Freeling::Client
2
2
 
3
- Simple client wrapper for Freeling analyzer tool. If you need to install freeling on Ubuntu 14.04 just follow [this](https://gist.github.com/malev/d6a8b51c2ae0a762ab1d) guide.
3
+ Simple client wrapper for Freeling analyzer tool. For only works with [this](http://nlp.lsi.upc.edu/freeling/doc/userman/html/node91.html) options. If you need to install freeling on Ubuntu 14.04 just follow [this](https://gist.github.com/malev/d6a8b51c2ae0a762ab1d) guide.
4
4
 
5
5
  ## Example of usage:
6
6
 
@@ -43,16 +43,20 @@ freeling_client.call(text)
43
43
 
44
44
  Performing morphological analysis:
45
45
 
46
- FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf morfo
46
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer \
47
+ -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf morfo
47
48
 
48
49
  Performing morphological with PoS tagging:
49
50
 
50
- FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf tagged
51
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer \
52
+ -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf tagged
51
53
 
52
54
  Asking for the senses of the tagged words:
53
55
 
54
- FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf sense --sense all
56
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer \
57
+ -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf sense --sense all
55
58
 
56
59
  With `nec` analysis:
57
60
 
58
- FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf tagged --nec --noflush
61
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer \
62
+ -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf tagged --nec --noflush
@@ -1,6 +1,10 @@
1
+ # encoding: utf-8
2
+
3
+ require "enumerator"
1
4
  require "open3"
2
5
  require "tempfile"
3
6
  require "timeout"
7
+
4
8
  require "hashie/mash"
5
9
  require "freeling_client/base"
6
10
 
@@ -8,6 +12,8 @@ require "freeling_client/base"
8
12
  module FreelingClient
9
13
  class Analyzer < Base
10
14
 
15
+ Token = Class.new(Hashie::Mash)
16
+
11
17
  def initialize(opt={})
12
18
  @config = opt.fetch(:config, 'config/freeling/analyzer.cfg')
13
19
  @port = opt[:port]
@@ -43,6 +49,58 @@ module FreelingClient
43
49
  output
44
50
  end
45
51
 
52
+ def tokens(cmd, text)
53
+ valide_command!(cmd)
54
+ Enumerator.new do |yielder|
55
+ call(cmd, text).each do |freeling_line|
56
+ yielder << parse_token_line(freeling_line) unless freeling_line.empty?
57
+ end
58
+ end
59
+ end
60
+
61
+ def ptokens(cmd, text)
62
+ Enumerator.new do |yielder|
63
+ pos = 0
64
+ tokens(cmd, text).each do |token|
65
+ ne_text = token['form'].dup
66
+
67
+ ne_regexp = build_regexp(ne_text)
68
+ token_pos = text.index(ne_regexp, pos)
69
+
70
+ if token_pos && token_pos < (pos + 5)
71
+ token.pos = token_pos
72
+ yielder << token
73
+
74
+ pos = token_pos + ne_text.length
75
+ else
76
+ pos = pos + ne_text.length
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ def parse_token_line(str)
83
+ form, lemma, tag, prob = str.split(' ')[0..3]
84
+ Token.new({
85
+ :form => form,
86
+ :lemma => lemma,
87
+ :tag => tag,
88
+ :prob => prob.nil? ? nil : prob.to_f,
89
+ }.reject { |k, v| v.nil? })
90
+ end
91
+
92
+ def build_regexp(ne_text)
93
+ begin
94
+ if ne_text =~ /\_/
95
+ /#{ne_text.split('_').join('\W+')}/i
96
+ else
97
+ /#{ne_text}/i
98
+ end
99
+ rescue RegexpError => e
100
+ /./
101
+ end
102
+ end
103
+
46
104
  def command(cmd, file_path)
47
105
  self.send("command_#{cmd}", file_path)
48
106
  end
@@ -1,3 +1,3 @@
1
1
  module FreelingClient
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -1,23 +1,34 @@
1
+ # encoding: utf-8
2
+
1
3
  require 'test_helper'
2
4
 
5
+ text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
3
6
 
4
7
  describe FreelingClient do
5
8
  it 'detects language' do
6
- text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
7
9
  lang_detector = FreelingClient::LanguageDetector.new
8
10
  lang_detector.detect(text).must_equal :es
9
11
  end
10
12
 
11
13
  it 'Uses freeling as a client (mode server)' do
12
- text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
13
14
  freeling_client = FreelingClient::Client.new
14
15
  freeling_client.call(text)[0].must_equal "El el DA0MS0 1"
15
16
  end
16
17
 
17
18
  it 'Uses freeling to get a morphological analysis' do
18
- text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
19
19
  analyzer = FreelingClient::Analyzer.new
20
-
21
20
  analyzer.call(:morfo, text)[0].must_equal "El el DA0MS0 1"
22
21
  end
22
+
23
+ it "returns parsed tokens" do
24
+ analyzer = FreelingClient::Analyzer.new
25
+ analyzer.tokens(:morfo, text).first.lemma.must_equal "el"
26
+ end
27
+
28
+ it "returns positionated tokens" do
29
+ analyzer = FreelingClient::Analyzer.new
30
+ tokens = analyzer.ptokens(:morfo, text).to_a
31
+ tokens[0].pos.must_equal 0
32
+ tokens[1].pos.must_equal 3
33
+ end
23
34
  end
data/test/test_helper.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require 'minitest/autorun'
2
2
  require 'minitest/unit'
3
- require 'mocha/mini_test'
4
3
 
5
4
  require 'freeling_client'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: freeling-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos Vanetta
@@ -72,8 +72,10 @@ executables: []
72
72
  extensions: []
73
73
  extra_rdoc_files: []
74
74
  files:
75
+ - ".gitignore"
75
76
  - Gemfile
76
77
  - Gemfile.lock
78
+ - LICENSE
77
79
  - README.md
78
80
  - Rakefile
79
81
  - config/freeling/analyzer.cfg