text_razor 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +42 -0
- data/Rakefile +1 -0
- data/lib/text_razor.rb +55 -0
- data/lib/text_razor/coarse_topic.rb +11 -0
- data/lib/text_razor/entity.rb +21 -0
- data/lib/text_razor/sentence.rb +10 -0
- data/lib/text_razor/topic.rb +11 -0
- data/lib/text_razor/version.rb +3 -0
- data/lib/text_razor/word.rb +18 -0
- data/spec/sentences_spec.rb +12 -0
- data/spec/text_razor_spec.rb +44 -0
- data/text_razor.gemspec +23 -0
- metadata +110 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Tomer Elmalem
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# TextRazor
|
2
|
+
|
3
|
+
TextRazor gem implements the TextRazor API for easy natural language processing of unstructed text. For more info about the API visit their website at textrazor.com.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'text_razor'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install text_razor
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Usage is pretty simple but pretty limited at the moment as not all features of the API are supported.
|
22
|
+
|
23
|
+
Start by initializing a new ``TextRazor`` object like this:
|
24
|
+
|
25
|
+
```
|
26
|
+
tr = TextRazor.new(api_key: '123456', text: 'This should be a huge body of text that you really really really want to process', extractors: 'entities')
|
27
|
+
|
28
|
+
```
|
29
|
+
|
30
|
+
Note that you are passing in a hash of three things: your API key from textrazor.com, the text you want to process, and the extractors you want to use. At the moment only entities, topics, coarse topics, and words are supported.
|
31
|
+
|
32
|
+
Once you initialize your object, call ``#process`` on it to get your data from the API:
|
33
|
+
|
34
|
+
```
|
35
|
+
tr.process # calls the api
|
36
|
+
tr.entities # returns all of the entities
|
37
|
+
tr.topics # returns all of the topics
|
38
|
+
tr.coarse_topics # returns all of the coarse topics
|
39
|
+
tr.words # returns all of the words
|
40
|
+
```
|
41
|
+
|
42
|
+
For more information on the data getting returned for those extractors, visit the documentation at http://www.textrazor.com/documentation_rest
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/lib/text_razor.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'rest-client'
|
3
|
+
require 'text_razor/version'
|
4
|
+
require 'text_razor/entity'
|
5
|
+
require 'text_razor/topic'
|
6
|
+
require 'text_razor/coarse_topic'
|
7
|
+
require 'text_razor/sentence'
|
8
|
+
require 'text_razor/word'
|
9
|
+
|
10
|
+
class TextRazor
|
11
|
+
attr_reader :entities, :topics, :coarse_topics, :sentences
|
12
|
+
|
13
|
+
URL = 'http://api.textrazor.com'
|
14
|
+
|
15
|
+
def initialize(options={})
|
16
|
+
@api_key = options[:api_key]
|
17
|
+
@text = options[:text]
|
18
|
+
@extractors = options[:extractors]
|
19
|
+
@json = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
def process
|
23
|
+
response = RestClient.post URL, {apiKey: @api_key, text: @text, extractors: @extractors}
|
24
|
+
@json = JSON.parse(response)['response']
|
25
|
+
|
26
|
+
create_entities(@json['entities'])
|
27
|
+
create_topics(@json['topics'])
|
28
|
+
create_coarse_topics(@json['coarseTopics'])
|
29
|
+
create_sentences(@json['sentences'])
|
30
|
+
end
|
31
|
+
|
32
|
+
def language
|
33
|
+
@json['language']
|
34
|
+
end
|
35
|
+
|
36
|
+
def is_reliable?
|
37
|
+
@json['languageIsReliable']
|
38
|
+
end
|
39
|
+
|
40
|
+
def create_entities(entities)
|
41
|
+
@entities = entities.collect {|entity| Entity.new(entity)} if entities
|
42
|
+
end
|
43
|
+
|
44
|
+
def create_topics(topics)
|
45
|
+
@topics = topics.collect{|topic| Topic.new(topic)} if topics
|
46
|
+
end
|
47
|
+
|
48
|
+
def create_coarse_topics(coarse_topics)
|
49
|
+
@coarse_topics = coarse_topics.collect{|coarse_topic| CoarseTopic.new(coarse_topic)} if coarse_topics
|
50
|
+
end
|
51
|
+
|
52
|
+
def create_sentences(sentences)
|
53
|
+
@sentences = sentences.collect{|sentence| Sentence.new(sentence)} if sentences
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class TextRazor
|
2
|
+
class Entity
|
3
|
+
attr_reader :confidence_score, :ending_pos, :entity_english_id, :entity_id,
|
4
|
+
:freebase_id, :freebase_types, :matching_text, :matching_tokens,
|
5
|
+
:relevance_score, :starting_pos, :wiki_link
|
6
|
+
|
7
|
+
def initialize(options={})
|
8
|
+
@confidence_score = options['confidenceScore']
|
9
|
+
@ending_pos = options['endingPos']
|
10
|
+
@entity_english_id = options['entityEnglishId']
|
11
|
+
@entity_id = options['entityId']
|
12
|
+
@freebase_id = options['freebaseId']
|
13
|
+
@freebase_types = options['freebaseTypes']
|
14
|
+
@matching_text = options['matchingText']
|
15
|
+
@matching_tokens = options['matchingTokens']
|
16
|
+
@relevance_score = options['relevanceScore']
|
17
|
+
@starting_pos = options['startingPos']
|
18
|
+
@wiki_link = options['wikiLink']
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class TextRazor
|
2
|
+
class Word
|
3
|
+
attr_reader :parent_position, :relation_to_parent, :position, :stem, :lemma,
|
4
|
+
:token, :part_of_speech, :starting_pos, :ending_pos
|
5
|
+
|
6
|
+
def initialize(options={})
|
7
|
+
@parent_position = options['parentPosition']
|
8
|
+
@relation_to_parent = options['relationToParent']
|
9
|
+
@position = options['position']
|
10
|
+
@stem = options['stem']
|
11
|
+
@lemma = options['lemma']
|
12
|
+
@token = options['token']
|
13
|
+
@part_of_speech = options['partOfSpeech']
|
14
|
+
@starting_pos = options['starting_pos']
|
15
|
+
@ending_pos = options['ending_pos']
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'text_razor/sentence'
|
2
|
+
|
3
|
+
describe TextRazor::Sentence do
|
4
|
+
it 'creates words when initializing a new sentence' do
|
5
|
+
json = {"position" => 1,
|
6
|
+
"words" => [{"position"=>0, "startingPos"=>0, "endingPos"=>3, "stem"=>"see", "lemma"=>"see", "token"=>"See", "partOfSpeech"=>"VB"},
|
7
|
+
{"position"=>1, "startingPos"=>3, "endingPos"=>4, "stem"=>"?", "lemma"=>"?", "token"=>"?", "partOfSpeech"=>"." }]}
|
8
|
+
sentence = TextRazor::Sentence.new(json)
|
9
|
+
sentence.words.first.class.should eql TextRazor::Word
|
10
|
+
sentence.words.count.should eql 2
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'text_razor'
|
2
|
+
|
3
|
+
describe TextRazor do
|
4
|
+
before do
|
5
|
+
@textrazor = TextRazor.new(api_key: 'abc123', text: 'This is a test sentence', extractors: 'entities')
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'does not raise an error if the response is empty' do
|
9
|
+
RestClient.stub(:post).and_return({'response' => {}}.to_json)
|
10
|
+
expect { @textrazor.process }.to_not raise_error
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'sets and creates entities' do
|
14
|
+
json = [{"id"=>0, "type"=>["WrittenWork"], "matchingTokens"=>[5], "entityId"=>"The New York Times", "confidenceScore"=>2.43203, "matchedText"=>"the new york times"},
|
15
|
+
{"id"=>1, "type"=>["Person"], "matchingTokens"=>[15], "entityId"=>"Nick Bilton", "confidenceScore"=>0.5, "matchedText"=>"Nick Bilton"}]
|
16
|
+
@textrazor.create_entities(json)
|
17
|
+
@textrazor.entities.first.class.should eql TextRazor::Entity
|
18
|
+
@textrazor.entities.count.should eql 2
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'sets and creates topics' do
|
22
|
+
json = [{"id"=>0, "label"=>"Advertising", "wikiLink"=>"http://en.wikipedia.org/Advertising", "score"=>0.426667},
|
23
|
+
{"id"=>1, "label"=>"Algorithm", "wikiLink"=>"http://en.wikipedia.org/Algorithm", "score"=>0.506667}]
|
24
|
+
@textrazor.create_topics(json)
|
25
|
+
@textrazor.topics.first.class.should eql TextRazor::Topic
|
26
|
+
@textrazor.topics.count.should eql 2
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'sets and creates coarse topics' do
|
30
|
+
json = [{"id"=>0, "label"=>"Advertising", "wikiLink"=>"http://en.wikipedia.org/Advertising", "score"=>0.426667},
|
31
|
+
{"id"=>1, "label"=>"Algorithm", "wikiLink"=>"http://en.wikipedia.org/Algorithm", "score"=>0.506667}]
|
32
|
+
@textrazor.create_coarse_topics(json)
|
33
|
+
@textrazor.coarse_topics.first.class.should eql TextRazor::CoarseTopic
|
34
|
+
@textrazor.coarse_topics.count.should eql 2
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'sets and creates sentences' do
|
38
|
+
json = [{"position"=>1, "words"=> [{},{}]},
|
39
|
+
{"position"=>2, "words"=> [{},{}]}]
|
40
|
+
@textrazor.create_sentences(json)
|
41
|
+
@textrazor.sentences.first.class.should eql TextRazor::Sentence
|
42
|
+
@textrazor.sentences.count.should eql 2
|
43
|
+
end
|
44
|
+
end
|
data/text_razor.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'text_razor/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "text_razor"
|
8
|
+
gem.version = TextRazor::VERSION
|
9
|
+
gem.authors = ["Tomer Elmalem"]
|
10
|
+
gem.email = ["telmalem@gmail.com"]
|
11
|
+
gem.description = %q{TextRazor API gem}
|
12
|
+
gem.summary = %q{An API wrapper for TextRazor's Natural Language Processing API}
|
13
|
+
gem.homepage = ""
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_dependency "rest-client"
|
21
|
+
gem.add_development_dependency "pry"
|
22
|
+
gem.add_development_dependency "rspec", "~> 2.6"
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: text_razor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tomer Elmalem
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-04-04 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rest-client
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: pry
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '2.6'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.6'
|
62
|
+
description: TextRazor API gem
|
63
|
+
email:
|
64
|
+
- telmalem@gmail.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- Gemfile
|
71
|
+
- LICENSE.txt
|
72
|
+
- README.md
|
73
|
+
- Rakefile
|
74
|
+
- lib/text_razor.rb
|
75
|
+
- lib/text_razor/coarse_topic.rb
|
76
|
+
- lib/text_razor/entity.rb
|
77
|
+
- lib/text_razor/sentence.rb
|
78
|
+
- lib/text_razor/topic.rb
|
79
|
+
- lib/text_razor/version.rb
|
80
|
+
- lib/text_razor/word.rb
|
81
|
+
- spec/sentences_spec.rb
|
82
|
+
- spec/text_razor_spec.rb
|
83
|
+
- text_razor.gemspec
|
84
|
+
homepage: ''
|
85
|
+
licenses: []
|
86
|
+
post_install_message:
|
87
|
+
rdoc_options: []
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ! '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 1.8.25
|
105
|
+
signing_key:
|
106
|
+
specification_version: 3
|
107
|
+
summary: An API wrapper for TextRazor's Natural Language Processing API
|
108
|
+
test_files:
|
109
|
+
- spec/sentences_spec.rb
|
110
|
+
- spec/text_razor_spec.rb
|