freeling-client 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: db5bc7707caee2de4e0609c5fe595ff9213c3f43
4
+ data.tar.gz: 090a06c682c062753199ec37b21eaeebc868315a
5
+ SHA512:
6
+ metadata.gz: 5fb75e56e9efaf8842271b502bfb6df06829cb3248959f3a712bf28d0b2f4724ff796218a24334b288b5851b2849e70f91011c3c3751052b5d3b589c4bcd6352
7
+ data.tar.gz: 841275c8fd541b0600229195f2fc4b82474b17c7d52a08e94a1003d6dfb860ea08f635c851c1b86612a3d6e6fd1cda288449e612301e55a73da04e11bca20486
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,21 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ freeling-client (0.0.1)
5
+ hashie
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ hashie (3.3.2)
11
+ minitest (5.4.3)
12
+ rake (10.3.2)
13
+
14
+ PLATFORMS
15
+ ruby
16
+
17
+ DEPENDENCIES
18
+ bundler (~> 1.7)
19
+ freeling-client!
20
+ minitest
21
+ rake (~> 10.3)
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Freeling::Client
2
+
3
+ Simple client wrapper for Freeling analyzer tool. If you need to install freeling on Ubuntu 14.04 just follow [this](https://gist.github.com/malev/d6a8b51c2ae0a762ab1d) guide.
4
+
5
+ ## Example of usage:
6
+
7
+ ```ruby
8
+ require 'freeling_client'
9
+
10
+ text <<-EOF
11
+ Malcolm X was effectively orphaned early in life. His father was killed when he was six and his mother was placed in a mental hospital when he was thirteen, after which he lived in a series of foster homes.
12
+ EOF
13
+
14
+ options = {
15
+ timeout: 200,
16
+ config: 'config.cfg',
17
+ fidn: 'ident.dat'
18
+ }
19
+
20
+ # Using the language detection tool
21
+ lang_detector = FreelingClient::LanguageDetector.new options
22
+ lang_detector.detect(text) # => :en
23
+
24
+ # Morphological, morpho with PoS tagging, tagged words and nec analysis
25
+ analyzer = FreelingClient::Analyzer.new options
26
+ analyzer.call(:morfo, text)
27
+ analyzer.call(:tagged, text)
28
+ analyzer.call(:tagged_sense, text)
29
+ analyzer.call(:tagged_ned, text)
30
+
31
+ # Using as a client
32
+ # You will need to setup the server first. Check bellow
33
+ options = {
34
+ server: 'localhost',
35
+ port: 50005
36
+ }
37
+
38
+ freeling_client = FreelingClient::Client.new options
39
+ freeling_client.call(text)
40
+ ```
41
+
42
+ ## Running Freeling as a server
43
+
44
+ Performing morphological analysis:
45
+
46
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf morfo
47
+
48
+ Performing morphological with PoS tagging:
49
+
50
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf tagged
51
+
52
+ Asking for the senses of the tagged words:
53
+
54
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf sense --sense all
55
+
56
+ With `nec` analysis:
57
+
58
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer -f config/freeling/analyzer.cfg --server --port 50005 --inpf plain --outf tagged --nec --noflush
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ t.test_files = FileList["test/**/*_test.rb"]
6
+ t.verbose = true
7
+ end
8
+
9
+ desc "Run tests"
10
+ task :default => :test
@@ -0,0 +1,89 @@
1
+ ##
2
+ #### default configuration file for Spanish analyzer
3
+ ##
4
+
5
+ TraceLevel=3
6
+ TraceModule=0x0000
7
+
8
+ ## Options to control the applied modules. The input may be partially
9
+ ## processed, or not a full analysis may me wanted. The specific
10
+ ## formats are a choice of the main program using the library, as well
11
+ ## as the responsability of calling only the required modules.
12
+ ## Valid input formats are: plain, token, splitted, morfo, tagged, sense.
13
+ ## Valid output formats are: : plain, token, splitted, morfo, tagged,
14
+ ## shallow, parsed, dep.
15
+ InputFormat=plain
16
+ OutputFormat=tagged
17
+
18
+ # consider each newline as a sentence end
19
+ AlwaysFlush=no
20
+
21
+ #### Tokenizer options
22
+ TokenizerFile=$FREELINGSHARE/es/tokenizer.dat
23
+
24
+ #### Splitter options
25
+ SplitterFile=$FREELINGSHARE/es/splitter.dat
26
+
27
+ #### Morfo options
28
+ AffixAnalysis=yes
29
+ MultiwordsDetection=yes
30
+ NumbersDetection=yes
31
+ PunctuationDetection=yes
32
+ DatesDetection=yes
33
+ QuantitiesDetection=yes
34
+ DictionarySearch=yes
35
+ ProbabilityAssignment=yes
36
+ OrthographicCorrection=no
37
+ DecimalPoint=,
38
+ ThousandPoint=.
39
+ LocutionsFile=$FREELINGSHARE/es/locucions.dat
40
+ QuantitiesFile=$FREELINGSHARE/es/quantities.dat
41
+ AffixFile=$FREELINGSHARE/es/afixos.dat
42
+ ProbabilityFile=$FREELINGSHARE/es/probabilitats.dat
43
+ DictionaryFile=$FREELINGSHARE/es/dicc.src
44
+ PunctuationFile=$FREELINGSHARE/common/punct.dat
45
+ ProbabilityThreshold=0.001
46
+
47
+ # NER options
48
+ NERecognition=yes
49
+ NPDataFile=$FREELINGSHARE/es/np.dat
50
+ ## comment line above and uncomment that below, if you want
51
+ ## a better NE recognizer (higer accuracy, lower speed)
52
+ #NPDataFile=$FREELINGSHARE/es/ner/ner-ab.dat
53
+
54
+ #Spelling Corrector config file
55
+ CorrectorFile=$FREELINGSHARE/es/corrector/corrector.dat
56
+
57
+ ## Phonetic encoding of words.
58
+ Phonetics=no
59
+ PhoneticsFile=$FREELINGSHARE/es/phonetics.dat
60
+
61
+ ## NEC options
62
+ NEClassification=no
63
+ NECFile=$FREELINGSHARE/es/nec/nec-svm.dat
64
+
65
+ ## Sense annotation options (none,all,mfs,ukb)
66
+ SenseAnnotation=none
67
+ SenseConfigFile=$FREELINGSHARE/es/senses.dat
68
+ UKBConfigFile=$FREELINGSHARE/es/ukb.dat
69
+
70
+ #### Tagger options
71
+ Tagger=hmm
72
+ TaggerHMMFile=$FREELINGSHARE/es/tagger.dat
73
+ TaggerRelaxFile=$FREELINGSHARE/es/constr_gram.dat
74
+ TaggerRelaxMaxIter=500
75
+ TaggerRelaxScaleFactor=670.0
76
+ TaggerRelaxEpsilon=0.001
77
+ TaggerRetokenize=yes
78
+ TaggerForceSelect=tagger
79
+
80
+ #### Parser options
81
+ GrammarFile=$FREELINGSHARE/es/grammar-dep.dat
82
+
83
+ #### Dependence Parser options
84
+ DepTxalaFile=$FREELINGSHARE/es/dep/dependences.dat
85
+
86
+ #### Coreference Solver options
87
+ CoreferenceResolution=no
88
+ CorefFile=$FREELINGSHARE/es/coref/coref.dat
89
+
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path("../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "freeling_client/version"
6
+
7
+
8
+ Gem::Specification.new do |s|
9
+ s.name = "freeling-client"
10
+ s.version = FreelingClient::VERSION
11
+ s.date = "2014-12-10"
12
+ s.summary = "Freeling client wrapper"
13
+ s.description = "Freeling client wrapper with API"
14
+ s.authors = ["Marcos Vanetta"]
15
+ s.email = "marcosvanetta@gmail.com"
16
+ s.homepage = "http://codingnews.info"
17
+ s.license = "MIT"
18
+
19
+ s.files = `git ls-files -z`.split("\x0")
20
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency "hashie", "~> 3"
24
+ s.add_development_dependency "bundler", "~> 1.7"
25
+ s.add_development_dependency "rake", "~> 10.3"
26
+ s.add_development_dependency "minitest", "~> 5"
27
+ end
@@ -0,0 +1,80 @@
1
+ require "open3"
2
+ require "tempfile"
3
+ require "timeout"
4
+ require "hashie/mash"
5
+ require "freeling_client/base"
6
+
7
+
8
+ module FreelingClient
9
+ class Analyzer < Base
10
+
11
+ def initialize(opt={})
12
+ @config = opt.fetch(:config, 'config/freeling/analyzer.cfg')
13
+ @port = opt[:port]
14
+ @server = opt[:server]
15
+ @timeout = opt.fetch(:timeout, 60) # Three hours
16
+ end
17
+
18
+ def call(cmd, text)
19
+ valide_command!(cmd)
20
+
21
+ output = []
22
+ file = Tempfile.new('foo', encoding: 'utf-8')
23
+ begin
24
+ file.write(text)
25
+ file.close
26
+ stdin, stdout, stderr = Open3.popen3(command(cmd, file.path))
27
+ Timeout::timeout(@timeout) {
28
+ until (line = stdout.gets).nil?
29
+ output << line.chomp
30
+ end
31
+
32
+ message = stderr.readlines
33
+ unless message.empty?
34
+ raise ExtractionError, message.join("\n")
35
+ end
36
+ }
37
+ rescue Timeout::Error
38
+ raise ExtractionError, "Timeout"
39
+ ensure
40
+ file.close
41
+ file.unlink
42
+ end
43
+ output
44
+ end
45
+
46
+ def command(cmd, file_path)
47
+ self.send("command_#{cmd}", file_path)
48
+ end
49
+
50
+ def command_morfo(file_path)
51
+ "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf morfo < #{file_path}"
52
+ end
53
+
54
+ def command_tagged(file_path)
55
+ "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged < #{file_path}"
56
+ end
57
+
58
+ def command_tagged_nec(file_path)
59
+ "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged --nec --noflush < #{file_path}"
60
+ end
61
+
62
+ def command_tagged_sense(file_path)
63
+ "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf sense --sense all < #{file_path}"
64
+ end
65
+
66
+ def freeling_share
67
+ "FREELINGSHARE=/usr/local/share/freeling/"
68
+ end
69
+
70
+ def freeling_bin
71
+ "/usr/local/bin/analyzer"
72
+ end
73
+
74
+ def valide_command!(cmd)
75
+ unless [:morfo, :tagged, :tagged_nec, :tagged_sense].include?(cmd)
76
+ raise CommandError, "#{cmd} does not exist"
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,9 @@
1
+ module FreelingClient
2
+ class Base
3
+ attr_reader :config, :ident, :server, :port
4
+
5
+ class ExtractionError < StandardError; end
6
+ class CommandError < StandardError; end
7
+
8
+ end
9
+ end
@@ -0,0 +1,45 @@
1
+ require "open3"
2
+ require "freeling_client/base"
3
+
4
+
5
+ module FreelingClient
6
+ class Client < Base
7
+ def initialize(opt = {})
8
+ @server = opt.fetch(:server, 'localhost')
9
+ @port = opt.fetch(:port, 50005)
10
+ @timeout = opt.fetch(:timeout, 120)
11
+ end
12
+
13
+ def call(text)
14
+ output = []
15
+ file = Tempfile.new('foo', encoding: 'utf-8')
16
+
17
+ begin
18
+ file.write(text)
19
+ file.close
20
+ stdin, stdout, stderr = Open3.popen3(command(file.path))
21
+
22
+ Timeout::timeout(@timeout) {
23
+ until (line = stdout.gets).nil?
24
+ output << line.chomp
25
+ end
26
+
27
+ message = stderr.readlines
28
+ unless message.empty?
29
+ raise ExtractionError, message.join("\n")
30
+ end
31
+ }
32
+ rescue Timeout::Error
33
+ raise ExtractionError, "Timeout"
34
+ ensure
35
+ file.close
36
+ file.unlink
37
+ end
38
+ output
39
+ end
40
+
41
+ def command(file_path)
42
+ "/usr/local/bin/analyzer_client #{server}:#{port} < #{file_path}"
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,45 @@
1
+ require "open3"
2
+ require "freeling_client/base"
3
+
4
+
5
+ module FreelingClient
6
+ class LanguageDetector < Base
7
+ def initialize(opt = {})
8
+ @config = opt.fetch(:config, 'config/freeling/analyzer.cfg')
9
+ @ident = opt.fetch(:ident, '/usr/local/share/freeling/common/lang_ident/ident.dat')
10
+ @timeout = opt.fetch(:timeout, 120)
11
+ end
12
+
13
+ def detect(text)
14
+ output = []
15
+ file = Tempfile.new('foo', encoding: 'utf-8')
16
+
17
+ begin
18
+ file.write(text)
19
+ file.close
20
+ stdin, stdout, stderr = Open3.popen3(command(file.path))
21
+
22
+ Timeout::timeout(@timeout) {
23
+ until (line = stdout.gets).nil?
24
+ output << line.chomp
25
+ end
26
+
27
+ message = stderr.readlines
28
+ unless message.empty?
29
+ raise ExtractionError, message.join("\n")
30
+ end
31
+ }
32
+ rescue Timeout::Error
33
+ raise ExtractionError, "Timeout"
34
+ ensure
35
+ file.close
36
+ file.unlink
37
+ end
38
+ output[0].to_sym
39
+ end
40
+
41
+ def command(file_path)
42
+ "/usr/local/bin/analyzer --outf ident --fidn #{ident} -f #{config} < #{file_path}"
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,3 @@
1
+ module FreelingClient
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,8 @@
1
+ require "tempfile"
2
+ require "freeling_client/version"
3
+ require "freeling_client/analyzer"
4
+ require "freeling_client/client"
5
+ require "freeling_client/language_detector"
6
+
7
+
8
+ module FreelingClient; end
data/start-freeling.sh ADDED
@@ -0,0 +1,5 @@
1
+ FREELINGSHARE=/usr/local/share/freeling/ analyzer \
2
+ -f config/freeling/analyzer.cfg \
3
+ --server --port 50005 \
4
+ --inpf plain \
5
+ --outf morfo
@@ -0,0 +1,23 @@
1
+ require 'test_helper'
2
+
3
+
4
+ describe FreelingClient do
5
+ it 'detects language' do
6
+ text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
7
+ lang_detector = FreelingClient::LanguageDetector.new
8
+ lang_detector.detect(text).must_equal :es
9
+ end
10
+
11
+ it 'Uses freeling as a client (mode server)' do
12
+ text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
13
+ freeling_client = FreelingClient::Client.new
14
+ freeling_client.call(text)[0].must_equal "El el DA0MS0 1"
15
+ end
16
+
17
+ it 'Uses freeling to get a morphological analysis' do
18
+ text = "El gato come pescado. Pero a Don Jaime no le gustan los gatos."
19
+ analyzer = FreelingClient::Analyzer.new
20
+
21
+ analyzer.call(:morfo, text)[0].must_equal "El el DA0MS0 1"
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ require 'minitest/autorun'
2
+ require 'minitest/unit'
3
+ require 'mocha/mini_test'
4
+
5
+ require 'freeling_client'
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: freeling-client
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Marcos Vanetta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: hashie
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.7'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5'
69
+ description: Freeling client wrapper with API
70
+ email: marcosvanetta@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - Gemfile
76
+ - Gemfile.lock
77
+ - README.md
78
+ - Rakefile
79
+ - config/freeling/analyzer.cfg
80
+ - freeling-client.gemspec
81
+ - lib/freeling_client.rb
82
+ - lib/freeling_client/analyzer.rb
83
+ - lib/freeling_client/base.rb
84
+ - lib/freeling_client/client.rb
85
+ - lib/freeling_client/language_detector.rb
86
+ - lib/freeling_client/version.rb
87
+ - start-freeling.sh
88
+ - test/lib/freeling_client_test.rb
89
+ - test/test_helper.rb
90
+ homepage: http://codingnews.info
91
+ licenses:
92
+ - MIT
93
+ metadata: {}
94
+ post_install_message:
95
+ rdoc_options: []
96
+ require_paths:
97
+ - lib
98
+ required_ruby_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ required_rubygems_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ requirements: []
109
+ rubyforge_project:
110
+ rubygems_version: 2.2.2
111
+ signing_key:
112
+ specification_version: 4
113
+ summary: Freeling client wrapper
114
+ test_files:
115
+ - test/lib/freeling_client_test.rb
116
+ - test/test_helper.rb