text_razor 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in text_razor.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Tomer Elmalem
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,42 @@
1
+ # TextRazor
2
+
3
+ TextRazor gem implements the TextRazor API for easy natural language processing of unstructed text. For more info about the API visit their website at textrazor.com.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'text_razor'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install text_razor
18
+
19
+ ## Usage
20
+
21
+ Usage is pretty simple but pretty limited at the moment as not all features of the API are supported.
22
+
23
+ Start by initializing a new ``TextRazor`` object like this:
24
+
25
+ ```
26
+ tr = TextRazor.new(api_key: '123456', text: 'This should be a huge body of text that you really really really want to process', extractors: 'entities')
27
+
28
+ ```
29
+
30
+ Note that you are passing in a hash of three things: your API key from textrazor.com, the text you want to process, and the extractors you want to use. At the moment only entities, topics, coarse topics, and words are supported.
31
+
32
+ Once you initialize your object, call ``#process`` on it to get your data from the API:
33
+
34
+ ```
35
+ tr.process # calls the api
36
+ tr.entities # returns all of the entities
37
+ tr.topics # returns all of the topics
38
+ tr.coarse_topics # returns all of the coarse topics
39
+ tr.words # returns all of the words
40
+ ```
41
+
42
+ For more information on the data getting returned for those extractors, visit the documentation at http://www.textrazor.com/documentation_rest
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,55 @@
1
+ require 'json'
2
+ require 'rest-client'
3
+ require 'text_razor/version'
4
+ require 'text_razor/entity'
5
+ require 'text_razor/topic'
6
+ require 'text_razor/coarse_topic'
7
+ require 'text_razor/sentence'
8
+ require 'text_razor/word'
9
+
10
+ class TextRazor
11
+ attr_reader :entities, :topics, :coarse_topics, :sentences
12
+
13
+ URL = 'http://api.textrazor.com'
14
+
15
+ def initialize(options={})
16
+ @api_key = options[:api_key]
17
+ @text = options[:text]
18
+ @extractors = options[:extractors]
19
+ @json = nil
20
+ end
21
+
22
+ def process
23
+ response = RestClient.post URL, {apiKey: @api_key, text: @text, extractors: @extractors}
24
+ @json = JSON.parse(response)['response']
25
+
26
+ create_entities(@json['entities'])
27
+ create_topics(@json['topics'])
28
+ create_coarse_topics(@json['coarseTopics'])
29
+ create_sentences(@json['sentences'])
30
+ end
31
+
32
+ def language
33
+ @json['language']
34
+ end
35
+
36
+ def is_reliable?
37
+ @json['languageIsReliable']
38
+ end
39
+
40
+ def create_entities(entities)
41
+ @entities = entities.collect {|entity| Entity.new(entity)} if entities
42
+ end
43
+
44
+ def create_topics(topics)
45
+ @topics = topics.collect{|topic| Topic.new(topic)} if topics
46
+ end
47
+
48
+ def create_coarse_topics(coarse_topics)
49
+ @coarse_topics = coarse_topics.collect{|coarse_topic| CoarseTopic.new(coarse_topic)} if coarse_topics
50
+ end
51
+
52
+ def create_sentences(sentences)
53
+ @sentences = sentences.collect{|sentence| Sentence.new(sentence)} if sentences
54
+ end
55
+ end
@@ -0,0 +1,11 @@
1
+ class TextRazor
2
+ class CoarseTopic
3
+ attr_reader :label, :score, :wiki_link
4
+
5
+ def initialize(options={})
6
+ @label = options['label']
7
+ @score = options['score']
8
+ @wiki_link = options['wiki_link']
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,21 @@
1
+ class TextRazor
2
+ class Entity
3
+ attr_reader :confidence_score, :ending_pos, :entity_english_id, :entity_id,
4
+ :freebase_id, :freebase_types, :matching_text, :matching_tokens,
5
+ :relevance_score, :starting_pos, :wiki_link
6
+
7
+ def initialize(options={})
8
+ @confidence_score = options['confidenceScore']
9
+ @ending_pos = options['endingPos']
10
+ @entity_english_id = options['entityEnglishId']
11
+ @entity_id = options['entityId']
12
+ @freebase_id = options['freebaseId']
13
+ @freebase_types = options['freebaseTypes']
14
+ @matching_text = options['matchingText']
15
+ @matching_tokens = options['matchingTokens']
16
+ @relevance_score = options['relevanceScore']
17
+ @starting_pos = options['startingPos']
18
+ @wiki_link = options['wikiLink']
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,10 @@
1
+ class TextRazor
2
+ class Sentence
3
+ attr_reader :position, :words
4
+
5
+ def initialize(options={})
6
+ @position = options['position']
7
+ @words = options['words'].collect {|word| Word.new(word)}
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ class TextRazor
2
+ class Topic
3
+ attr_reader :label, :score, :wiki_link
4
+
5
+ def initialize(options={})
6
+ @label = options['label']
7
+ @score = options['score']
8
+ @wiki_link = options['wiki_link']
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,3 @@
1
+ class TextRazor
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,18 @@
1
+ class TextRazor
2
+ class Word
3
+ attr_reader :parent_position, :relation_to_parent, :position, :stem, :lemma,
4
+ :token, :part_of_speech, :starting_pos, :ending_pos
5
+
6
+ def initialize(options={})
7
+ @parent_position = options['parentPosition']
8
+ @relation_to_parent = options['relationToParent']
9
+ @position = options['position']
10
+ @stem = options['stem']
11
+ @lemma = options['lemma']
12
+ @token = options['token']
13
+ @part_of_speech = options['partOfSpeech']
14
+ @starting_pos = options['starting_pos']
15
+ @ending_pos = options['ending_pos']
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ require 'text_razor/sentence'
2
+
3
+ describe TextRazor::Sentence do
4
+ it 'creates words when initializing a new sentence' do
5
+ json = {"position" => 1,
6
+ "words" => [{"position"=>0, "startingPos"=>0, "endingPos"=>3, "stem"=>"see", "lemma"=>"see", "token"=>"See", "partOfSpeech"=>"VB"},
7
+ {"position"=>1, "startingPos"=>3, "endingPos"=>4, "stem"=>"?", "lemma"=>"?", "token"=>"?", "partOfSpeech"=>"." }]}
8
+ sentence = TextRazor::Sentence.new(json)
9
+ sentence.words.first.class.should eql TextRazor::Word
10
+ sentence.words.count.should eql 2
11
+ end
12
+ end
@@ -0,0 +1,44 @@
1
+ require 'text_razor'
2
+
3
+ describe TextRazor do
4
+ before do
5
+ @textrazor = TextRazor.new(api_key: 'abc123', text: 'This is a test sentence', extractors: 'entities')
6
+ end
7
+
8
+ it 'does not raise an error if the response is empty' do
9
+ RestClient.stub(:post).and_return({'response' => {}}.to_json)
10
+ expect { @textrazor.process }.to_not raise_error
11
+ end
12
+
13
+ it 'sets and creates entities' do
14
+ json = [{"id"=>0, "type"=>["WrittenWork"], "matchingTokens"=>[5], "entityId"=>"The New York Times", "confidenceScore"=>2.43203, "matchedText"=>"the new york times"},
15
+ {"id"=>1, "type"=>["Person"], "matchingTokens"=>[15], "entityId"=>"Nick Bilton", "confidenceScore"=>0.5, "matchedText"=>"Nick Bilton"}]
16
+ @textrazor.create_entities(json)
17
+ @textrazor.entities.first.class.should eql TextRazor::Entity
18
+ @textrazor.entities.count.should eql 2
19
+ end
20
+
21
+ it 'sets and creates topics' do
22
+ json = [{"id"=>0, "label"=>"Advertising", "wikiLink"=>"http://en.wikipedia.org/Advertising", "score"=>0.426667},
23
+ {"id"=>1, "label"=>"Algorithm", "wikiLink"=>"http://en.wikipedia.org/Algorithm", "score"=>0.506667}]
24
+ @textrazor.create_topics(json)
25
+ @textrazor.topics.first.class.should eql TextRazor::Topic
26
+ @textrazor.topics.count.should eql 2
27
+ end
28
+
29
+ it 'sets and creates coarse topics' do
30
+ json = [{"id"=>0, "label"=>"Advertising", "wikiLink"=>"http://en.wikipedia.org/Advertising", "score"=>0.426667},
31
+ {"id"=>1, "label"=>"Algorithm", "wikiLink"=>"http://en.wikipedia.org/Algorithm", "score"=>0.506667}]
32
+ @textrazor.create_coarse_topics(json)
33
+ @textrazor.coarse_topics.first.class.should eql TextRazor::CoarseTopic
34
+ @textrazor.coarse_topics.count.should eql 2
35
+ end
36
+
37
+ it 'sets and creates sentences' do
38
+ json = [{"position"=>1, "words"=> [{},{}]},
39
+ {"position"=>2, "words"=> [{},{}]}]
40
+ @textrazor.create_sentences(json)
41
+ @textrazor.sentences.first.class.should eql TextRazor::Sentence
42
+ @textrazor.sentences.count.should eql 2
43
+ end
44
+ end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'text_razor/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "text_razor"
8
+ gem.version = TextRazor::VERSION
9
+ gem.authors = ["Tomer Elmalem"]
10
+ gem.email = ["telmalem@gmail.com"]
11
+ gem.description = %q{TextRazor API gem}
12
+ gem.summary = %q{An API wrapper for TextRazor's Natural Language Processing API}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency "rest-client"
21
+ gem.add_development_dependency "pry"
22
+ gem.add_development_dependency "rspec", "~> 2.6"
23
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: text_razor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tomer Elmalem
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rest-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: pry
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '2.6'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.6'
62
+ description: TextRazor API gem
63
+ email:
64
+ - telmalem@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - LICENSE.txt
72
+ - README.md
73
+ - Rakefile
74
+ - lib/text_razor.rb
75
+ - lib/text_razor/coarse_topic.rb
76
+ - lib/text_razor/entity.rb
77
+ - lib/text_razor/sentence.rb
78
+ - lib/text_razor/topic.rb
79
+ - lib/text_razor/version.rb
80
+ - lib/text_razor/word.rb
81
+ - spec/sentences_spec.rb
82
+ - spec/text_razor_spec.rb
83
+ - text_razor.gemspec
84
+ homepage: ''
85
+ licenses: []
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ none: false
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 1.8.25
105
+ signing_key:
106
+ specification_version: 3
107
+ summary: An API wrapper for TextRazor's Natural Language Processing API
108
+ test_files:
109
+ - spec/sentences_spec.rb
110
+ - spec/text_razor_spec.rb