freeling-client 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: db5bc7707caee2de4e0609c5fe595ff9213c3f43
4
+ data.tar.gz: 090a06c682c062753199ec37b21eaeebc868315a
5
+ SHA512:
6
+ metadata.gz: 5fb75e56e9efaf8842271b502bfb6df06829cb3248959f3a712bf28d0b2f4724ff796218a24334b288b5851b2849e70f91011c3c3751052b5d3b589c4bcd6352
7
+ data.tar.gz: 841275c8fd541b0600229195f2fc4b82474b17c7d52a08e94a1003d6dfb860ea08f635c851c1b86612a3d6e6fd1cda288449e612301e55a73da04e11bca20486
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,21 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ freeling-client (0.0.1)
5
+ hashie
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ hashie (3.3.2)
11
+ minitest (5.4.3)
12
+ rake (10.3.2)
13
+
14
+ PLATFORMS
15
+ ruby
16
+
17
+ DEPENDENCIES
18
+ bundler (~> 1.7)
19
+ freeling-client!
20
+ minitest
21
+ rake (~> 10.3)
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Freeling::Client
2
+
3
+ Simple client wrapper for Freeling analyzer tool. If you need to install freeling on Ubuntu 14.04 just follow [this](https://gist.github.com/malev/d6a8b51c2ae0a762ab1d) guide.
4
+
5
+ ## Example of usage:
6
+
7
+ ```ruby
8
+ require 'freeling_client'
9
+
10
+ text <<-EOF
11
+ Malcolm X was effectively orphaned early in life. His father was killed when he was six and his mother was placed in a mental hospital when he was thirteen, after which he lived in a series of foster homes.
12
+ EOF
13
+
14
+ options = {
15
+ timeout: 200,
16
+ config: 'config.cfg',
17
+ fidn: 'ident.dat'
18
+ }
19
+
20
+ # Using the language detection tool
21
+ lang_detector = FreelingClient::LanguageDetector.new options
22
+ lang_detector.detect(text) # => :en
23
+
24
+ # Morphological, morpho with PoS tagging, tagged words and nec analysis
25
+ analyzer = FreelingClient::Analyzer.new options
26
+ analyzer.call(:morfo, text)
27
+ analyzer.call(:tagged, text)
28
+ analyzer.call(:tagged_sense, text)
29
+ analyzer.call(:tagged_ned, text)
30
+
31
+ # Using as a client
32
+ # You will need to setup the server first. Check bellow
33
+ options = {
34
+ server: 'localhost',
35
+ port: 50005
36
+ }
37
+
38
+ freeling_client = FreelingClient::Client.new options
39
+ freeling_client.call(text)
40
+ ```
41
+
42
+ ## Running Freeling as a server
43
+
44
+ Performing morphological analysis:
45
+
46
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf morfo
47
+
48
+ Performing morphological with PoS tagging:
49
+
50
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf tagged
51
+
52
+ Asking for the senses of the tagged words:
53
+
54
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf sense --sense all
55
+
56
+ With `nec` analysis:
57
+
58
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf tagged --nec --noflush
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ t.test_files = FileList["test/**/*_test.rb"]
6
+ t.verbose = true
7
+ end
8
+
9
+ desc "Run tests"
10
+ task :default => :test
@@ -0,0 +1,89 @@
1
+ ##
2
+ #### default configuration file for Spanish analyzer
3
+ ##
4
+
5
+ TraceLevel=3
6
+ TraceModule=0x0000
7
+
8
+ ## Options to control the applied modules. The input may be partially
9
+ ## processed, or not a full analysis may me wanted. The specific
10
+ ## formats are a choice of the main program using the library, as well
11
+ ## as the responsability of calling only the required modules.
12
+ ## Valid input formats are: plain, token, splitted, morfo, tagged, sense.
13
+ ## Valid output formats are: : plain, token, splitted, morfo, tagged,
14
+ ## shallow, parsed, dep.
15
+ InputFormat=plain
16
+ OutputFormat=tagged
17
+
18
+ # consider each newline as a sentence end
19
+ AlwaysFlush=no
20
+
21
+ #### Tokenizer options
22
+ TokenizerFile=$FREELINGSHARE/es/tokenizer.dat
23
+
24
+ #### Splitter options
25
+ SplitterFile=$FREELINGSHARE/es/splitter.dat
26
+
27
+ #### Morfo options
28
+ AffixAnalysis=yes
29
+ MultiwordsDetection=yes
30
+ NumbersDetection=yes
31
+ PunctuationDetection=yes
32
+ DatesDetection=yes
33
+ QuantitiesDetection=yes
34
+ DictionarySearch=yes
35
+ ProbabilityAssignment=yes
36
+ OrthographicCorrection=no
37
+ DecimalPoint=,
38
+ ThousandPoint=.
39
+ LocutionsFile=$FREELINGSHARE/es/locucions.dat
40
+ QuantitiesFile=$FREELINGSHARE/es/quantities.dat
41
+ AffixFile=$FREELINGSHARE/es/afixos.dat
42
+ ProbabilityFile=$FREELINGSHARE/es/probabilitats.dat
43
+ DictionaryFile=$FREELINGSHARE/es/dicc.src
44
+ PunctuationFile=$FREELINGSHARE/common/punct.dat
45
+ ProbabilityThreshold=0.001
46
+
47
+ # NER options
48
+ NERecognition=yes
49
+ NPDataFile=$FREELINGSHARE/es/np.dat
50
+ ## comment line above and uncomment that below, if you want
51
+ ## a better NE recognizer (higer accuracy, lower speed)
52
+ #NPDataFile=$FREELINGSHARE/es/ner/ner-ab.dat
53
+
54
+ #Spelling Corrector config file
55
+ CorrectorFile=$FREELINGSHARE/es/corrector/corrector.dat
56
+
57
+ ## Phonetic encoding of words.
58
+ Phonetics=no
59
+ PhoneticsFile=$FREELINGSHARE/es/phonetics.dat
60
+
61
+ ## NEC options
62
+ NEClassification=no
63
+ NECFile=$FREELINGSHARE/es/nec/nec-svm.dat
64
+
65
+ ## Sense annotation options (none,all,mfs,ukb)
66
+ SenseAnnotation=none
67
+ SenseConfigFile=$FREELINGSHARE/es/senses.dat
68
+ UKBConfigFile=$FREELINGSHARE/es/ukb.dat
69
+
70
+ #### Tagger options
71
+ Tagger=hmm
72
+ TaggerHMMFile=$FREELINGSHARE/es/tagger.dat
73
+ TaggerRelaxFile=$FREELINGSHARE/es/constr_gram.dat
74
+ TaggerRelaxMaxIter=500
75
+ TaggerRelaxScaleFactor=670.0
76
+ TaggerRelaxEpsilon=0.001
77
+ TaggerRetokenize=yes
78
+ TaggerForceSelect=tagger
79
+
80
+ #### Parser options
81
+ GrammarFile=$FREELINGSHARE/es/grammar-dep.dat
82
+
83
+ #### Dependence Parser options
84
+ DepTxalaFile=$FREELINGSHARE/es/dep/dependences.dat
85
+
86
+ #### Coreference Solver options
87
+ CoreferenceResolution=no
88
+ CorefFile=$FREELINGSHARE/es/coref/coref.dat
89
+
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path("../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "freeling_client/version"
6
+
7
+
8
+ Gem::Specification.new do |s|
9
+ s.name = "freeling-client"
10
+ s.version = FreelingClient::VERSION
11
+ s.date = "2014-12-10"
12
+ s.summary = "Freeling client wrapper"
13
+ s.description = "Freeling client wrapper with API"
14
+ s.authors = ["Marcos Vanetta"]
15
+ s.email = "marcosvanetta@gmail.com"
16
+ s.homepage = "http://codingnews.info"
17
+ s.license = "MIT"
18
+
19
+ s.files = `git ls-files -z`.split("\x0")
20
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency "hashie", "~> 3"
24
+ s.add_development_dependency "bundler", "~> 1.7"
25
+ s.add_development_dependency "rake", "~> 10.3"
26
+ s.add_development_dependency "minitest", "~> 5"
27
+ end
@@ -0,0 +1,80 @@
1
+ require "open3"
2
+ require "tempfile"
3
+ require "timeout"
4
+ require "hashie/mash"
5
+ require "freeling_client/base"
6
+
7
+
8
+ module FreelingClient
9
+ class Analyzer < Base
10
+
11
+ def initialize(opt={})
12
+ @config = opt.fetch(:config, 'config/freeling/analyzer.cfg')
13
+ @port = opt[:port]
14
+ @server = opt[:server]
15
+ @timeout = opt.fetch(:timeout, 60) # Three hours
16
+ end
17
+
18
+ def call(cmd, text)
19
+ valide_command!(cmd)
20
+
21
+ output = []
22
+ file = Tempfile.new('foo', encoding: 'utf-8')
23
+ begin
24
+ file.write(text)
25
+ file.close
26
+ stdin, stdout, stderr = Open3.popen3(command(cmd, file.path))
27
+ Timeout::timeout(@timeout) {
28
+ until (line = stdout.gets).nil?
29
+ output << line.chomp
30
+ end
31
+
32
+ message = stderr.readlines
33
+ unless message.empty?
34
+ raise ExtractionError, message.join("\n")
35
+ end
36
+ }
37
+ rescue Timeout::Error
38
+ raise ExtractionError, "Timeout"
39
+ ensure
40
+ file.close
41
+ file.unlink
42
+ end
43
+ output
44
+ end
45
+
46
+ def command(cmd, file_path)
47
+ self.send("command_#{cmd}", file_path)
48
+ end
49
+
50
+ def command_morfo(file_path)
51
+ "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf morfo < #{file_path}"
52
+ end
53
+
54
+ def command_tagged(file_path)
55
+ "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged < #{file_path}"
56
+ end
57
+
58
+ def command_tagged_nec(file_path)
59
+ "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged --nec --noflush < #{file_path}"
60
+ end
61
+
62
+ def command_tagged_sense(file_path)
63
+ "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf sense --sense all < #{file_path}"
64
+ end
65
+
66
+ def freeling_share
67
+ "FREELINGSHARE=/usr/local/share/freeling/"
68
+ end
69
+
70
+ def freeling_bin
71
+ "/usr/local/bin/analyzer"
72
+ end
73
+
74
+ def valide_command!(cmd)
75
+ unless [:morfo, :tagged, :tagged_nec, :tagged_sense].include?(cmd)
76
+ raise CommandError, "#{cmd} does not exist"
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,9 @@
1
+ module FreelingClient
2
+ class Base
3
+ attr_reader :config, :ident, :server, :port
4
+
5
+ class ExtractionError < StandardError; end
6
+ class CommandError < StandardError; end
7
+
8
+ end
9
+ end
@@ -0,0 +1,45 @@
1
+ require "open3"
2
+ require "freeling_client/base"
3
+
4
+
5
+ module FreelingClient
6
+ class Client < Base
7
+ def initialize(opt = {})
8
+ @server = opt.fetch(:server, 'localhost')
9
+ @port = opt.fetch(:port, 50005)
10
+ @timeout = opt.fetch(:timeout, 120)
11
+ end
12
+
13
+ def call(text)
14
+ output = []
15
+ file = Tempfile.new('foo', encoding: 'utf-8')
16
+
17
+ begin
18
+ file.write(text)
19
+ file.close
20
+ stdin, stdout, stderr = Open3.popen3(command(file.path))
21
+
22
+ Timeout::timeout(@timeout) {
23
+ until (line = stdout.gets).nil?
24
+ output << line.chomp
25
+ end
26
+
27
+ message = stderr.readlines
28
+ unless message.empty?
29
+ raise ExtractionError, message.join("\n")
30
+ end
31
+ }
32
+ rescue Timeout::Error
33
+ raise ExtractionError, "Timeout"
34
+ ensure
35
+ file.close
36
+ file.unlink
37
+ end
38
+ output
39
+ end
40
+
41
+ def command(file_path)
42
+ "/usr/local/bin/analyzer_client #{server}:#{port} < #{file_path}"
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,45 @@
1
+ require "open3"
2
+ require "freeling_client/base"
3
+
4
+
5
+ module FreelingClient
6
+ class LanguageDetector < Base
7
+ def initialize(opt = {})
8
+ @config = opt.fetch(:config, 'config/freeling/analyzer.cfg')
9
+ @ident = opt.fetch(:ident, '/usr/local/share/freeling/common/lang_ident/ident.dat')
10
+ @timeout = opt.fetch(:timeout, 120)
11
+ end
12
+
13
+ def detect(text)
14
+ output = []
15
+ file = Tempfile.new('foo', encoding: 'utf-8')
16
+
17
+ begin
18
+ file.write(text)
19
+ file.close
20
+ stdin, stdout, stderr = Open3.popen3(command(file.path))
21
+
22
+ Timeout::timeout(@timeout) {
23
+ until (line = stdout.gets).nil?
24
+ output << line.chomp
25
+ end
26
+
27
+ message = stderr.readlines
28
+ unless message.empty?
29
+ raise ExtractionError, message.join("\n")
30
+ end
31
+ }
32
+ rescue Timeout::Error
33
+ raise ExtractionError, "Timeout"
34
+ ensure
35
+ file.close
36
+ file.unlink
37
+ end
38
+ output[0].to_sym
39
+ end
40
+
41
+ def command(file_path)
42
+ "/usr/local/bin/analyzer --outf ident --fidn #{ident} -f #{config} < #{file_path}"
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,3 @@
1
+ module FreelingClient
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,8 @@
1
+ require "tempfile"
2
+ require "freeling_client/version"
3
+ require "freeling_client/analyzer"
4
+ require "freeling_client/client"
5
+ require "freeling_client/language_detector"
6
+
7
+
8
+ module FreelingClient; end
data/start-freeling.sh ADDED
@@ -0,0 +1,5 @@
1
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer \
2
+ -f config/freeling/analyzer.cfg \
3
+ --server --port 50005 \
4
+ --inpf plain \
5
+ --outf morfo
@@ -0,0 +1,23 @@
1
+ require 'test_helper'
2
+
3
+
4
+ describe FreelingClient do
5
+ it 'detects language' do
6
+ text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
7
+ lang_detector = FreelingClient::LanguageDetector.new
8
+ lang_detector.detect(text).must_equal :es
9
+ end
10
+
11
+ it 'Uses freeling as a client (mode server)' do
12
+ text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
13
+ freeling_client = FreelingClient::Client.new
14
+ freeling_client.call(text)[0].must_equal "El el DA0MS0 1"
15
+ end
16
+
17
+ it 'Uses freeling to get a morphological analysis' do
18
+ text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
19
+ analyzer = FreelingClient::Analyzer.new
20
+
21
+ analyzer.call(:morfo, text)[0].must_equal "El el DA0MS0 1"
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ require 'minitest/autorun'
2
+ require 'minitest/unit'
3
+ require 'mocha/mini_test'
4
+
5
+ require 'freeling_client'
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: freeling-client
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Marcos Vanetta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: hashie
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.7'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5'
69
+ description: Freeling client wrapper with API
70
+ email: marcosvanetta@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - Gemfile
76
+ - Gemfile.lock
77
+ - README.md
78
+ - Rakefile
79
+ - config/freeling/analyzer.cfg
80
+ - freeling-client.gemspec
81
+ - lib/freeling_client.rb
82
+ - lib/freeling_client/analyzer.rb
83
+ - lib/freeling_client/base.rb
84
+ - lib/freeling_client/client.rb
85
+ - lib/freeling_client/language_detector.rb
86
+ - lib/freeling_client/version.rb
87
+ - start-freeling.sh
88
+ - test/lib/freeling_client_test.rb
89
+ - test/test_helper.rb
90
+ homepage: http://codingnews.info
91
+ licenses:
92
+ - MIT
93
+ metadata: {}
94
+ post_install_message:
95
+ rdoc_options: []
96
+ require_paths:
97
+ - lib
98
+ required_ruby_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ required_rubygems_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ requirements: []
109
+ rubyforge_project:
110
+ rubygems_version: 2.2.2
111
+ signing_key:
112
+ specification_version: 4
113
+ summary: Freeling client wrapper
114
+ test_files:
115
+ - test/lib/freeling_client_test.rb
116
+ - test/test_helper.rb