tf-idf-similarity 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c0ba1f941db96541f035a283df336907bf941439
4
- data.tar.gz: 22bbec24681023e880e1e4e3fa14d26356630021
3
+ metadata.gz: 736ca4c4b93d14ea046cbc4bdae930c8b88082be
4
+ data.tar.gz: 6b43e8356c59e0f48ac08f300186d4e12497368d
5
5
  SHA512:
6
- metadata.gz: 9e7cca8d705d8080dff857d2d953a6f0091e361bb0693f0ce650e64a2f4633ad5db386fa41ea8b73ae1cfe839db8e4e9f56592c98b36cdc6ab756699ecfaa5f7
7
- data.tar.gz: 3bcb9dcb07c9eb00c234920ff8d6340aac815c8181510d7ae65183e9b1d528001247439a86c6b603d973362bcee020eb0340558a9318693713cdaaa4b62a2ffd
6
+ metadata.gz: 635ea3047ba54a951020f95ab7e9412adf07a39d6042b85e605fcd0517345d506690bac11ab05f7f20e16f80106e95e8002fd1ae2ab4e466a27cc4f143ac15d6
7
+ data.tar.gz: 693ac6c70f9daf3f0a1ed06ba693d170654e7c871702641d598e59a6fc69cbd5316e76441da062aca05d0b8f67c0ba0c958a4115e2c9da17316a2ebef2190738
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
@@ -1,21 +1,25 @@
1
+ sudo: false
1
2
  language: ruby
3
+ cache: bundler
2
4
  rvm:
3
- - 1.9.2
4
5
  - 1.9.3
5
6
  - 2.0.0
6
7
  - 2.1.0
8
+ - 2.2.0
7
9
  env:
8
10
  - MATRIX_LIBRARY=gsl
9
11
  - MATRIX_LIBRARY=narray
10
12
  - MATRIX_LIBRARY=nmatrix
11
13
  - MATRIX_LIBRARY=matrix
14
+ addons:
15
+ apt:
16
+ packages:
17
+ - gsl-bin
18
+ - libgsl0-dev
19
+ # Installing ATLAS will install BLAS.
20
+ - libatlas-dev
21
+ - libatlas-base-dev
22
+ - libatlas3gf-base
12
23
  before_install:
13
24
  - bundle config build.nmatrix --with-lapacklib
14
- - if [ $MATRIX_LIBRARY = 'nmatrix' -o $MATRIX_LIBRARY = 'gsl' ]; then sudo apt-get update -qq; fi
15
- - if [ $MATRIX_LIBRARY = 'gsl' ]; then sudo apt-get install gsl-bin libgsl0-dev; fi
16
- # Installing ATLAS will install BLAS.
17
- - if [ $MATRIX_LIBRARY = 'nmatrix' ]; then sudo apt-get install -qq libatlas-dev libatlas-base-dev libatlas3gf-base; fi
18
- - if [ $MATRIX_LIBRARY = 'nmatrix' ]; then export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/atlas; fi
19
- # Travis sometimes runs without Bundler.
20
- install: bundle
21
- script: bundle exec rake --trace
25
+ - export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/atlas
data/.yardopts CHANGED
@@ -1,4 +1,3 @@
1
- --no-private
2
1
  --hide-void-return
3
2
  --embed-mixin ClassMethods
4
3
  --markup=markdown
data/Gemfile CHANGED
@@ -1,8 +1,8 @@
1
- source "http://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
- gem 'rb-gsl', '~> 1.16.0.2' if ENV['MATRIX_LIBRARY'] == 'gsl'
3
+ gem 'rb-gsl', '~> 1.16.0.2' if ENV['MATRIX_LIBRARY'] == 'gsl'
4
4
  gem 'narray', '~> 0.6.0.0' if ENV['MATRIX_LIBRARY'] == 'narray'
5
- gem 'nmatrix', '~> 0.1.0.rc5' if ENV['MATRIX_LIBRARY'] == 'nmatrix' && RUBY_VERSION >= '1.9'
5
+ gem 'nmatrix', '~> 0.1.0.rc5' if ENV['MATRIX_LIBRARY'] == 'nmatrix' && RUBY_VERSION >= '1.9'
6
6
 
7
7
  # Specify your gem's dependencies in the gemspec
8
8
  gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012 Open North Inc.
1
+ Copyright (c) 2012 James McKinney
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # Ruby Vector Space Model (VSM) with tf*idf weights
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/tf-idf-similarity.svg)](http://badge.fury.io/rb/tf-idf-similarity)
4
- [![Build Status](https://secure.travis-ci.org/opennorth/tf-idf-similarity.png)](http://travis-ci.org/opennorth/tf-idf-similarity)
5
- [![Dependency Status](https://gemnasium.com/opennorth/tf-idf-similarity.png)](https://gemnasium.com/opennorth/tf-idf-similarity)
6
- [![Coverage Status](https://coveralls.io/repos/opennorth/tf-idf-similarity/badge.png?branch=master)](https://coveralls.io/r/opennorth/tf-idf-similarity)
7
- [![Code Climate](https://codeclimate.com/github/opennorth/tf-idf-similarity.png)](https://codeclimate.com/github/opennorth/tf-idf-similarity)
3
+ [![Gem Version](https://badge.fury.io/rb/tf-idf-similarity.svg)](https://badge.fury.io/rb/tf-idf-similarity)
4
+ [![Build Status](https://secure.travis-ci.org/jpmckinney/tf-idf-similarity.png)](https://travis-ci.org/jpmckinney/tf-idf-similarity)
5
+ [![Dependency Status](https://gemnasium.com/jpmckinney/tf-idf-similarity.png)](https://gemnasium.com/jpmckinney/tf-idf-similarity)
6
+ [![Coverage Status](https://coveralls.io/repos/jpmckinney/tf-idf-similarity/badge.png)](https://coveralls.io/r/jpmckinney/tf-idf-similarity)
7
+ [![Code Climate](https://codeclimate.com/github/jpmckinney/tf-idf-similarity.png)](https://codeclimate.com/github/jpmckinney/tf-idf-similarity)
8
8
 
9
- Calculates the similarity between texts using a [bag-of-words](http://en.wikipedia.org/wiki/Bag_of_words_model) [Vector Space Model](http://en.wikipedia.org/wiki/Vector_space_model) with [Term Frequency-Inverse Document Frequency (tf*idf)](http://en.wikipedia.org/wiki/Tf*idf) weights. If your use case demands performance, use [Lucene](http://lucene.apache.org/core/) (see below).
9
+ Calculates the similarity between texts using a [bag-of-words](https://en.wikipedia.org/wiki/Bag_of_words_model) [Vector Space Model](https://en.wikipedia.org/wiki/Vector_space_model) with [Term Frequency-Inverse Document Frequency (tf*idf)](https://en.wikipedia.org/wiki/Tfidf) weights. If your use case demands performance, use [Lucene](http://lucene.apache.org/core/) (see below).
10
10
 
11
11
  ## Usage
12
12
 
@@ -24,13 +24,13 @@ document3 = TfIdfSimilarity::Document.new("Nam scelerisque dui sed leo...")
24
24
  corpus = [document1, document2, document3]
25
25
  ```
26
26
 
27
- Create a document-term matrix using [Term Frequency-Inverse Document Frequency function](http://en.wikipedia.org/wiki/):
27
+ Create a document-term matrix using [Term Frequency-Inverse Document Frequency function](https://en.wikipedia.org/wiki/Tf–idf):
28
28
 
29
29
  ```ruby
30
30
  model = TfIdfSimilarity::TfIdfModel.new(corpus)
31
31
  ```
32
32
 
33
- Or, create a document-term matrix using the [Okapi BM25 ranking function](http://en.wikipedia.org/wiki/Okapi_BM25):
33
+ Or, create a document-term matrix using the [Okapi BM25 ranking function](https://en.wikipedia.org/wiki/Okapi_BM25):
34
34
 
35
35
  ```ruby
36
36
  model = TfIdfSimilarity::BM25Model.new(corpus)
@@ -58,16 +58,46 @@ end
58
58
  puts tfidf_by_term.sort_by{|_,tfidf| -tfidf}
59
59
  ```
60
60
 
61
+ Tokenize a document yourself, for example by excluding stop words:
62
+
63
+ ```ruby
64
+ require 'unicode_utils'
65
+ text = "Lorem ipsum dolor sit amet..."
66
+ tokens = UnicodeUtils.each_word(text).to_a - ['and', 'the', 'to']
67
+ document1 = TfIdfSimilarity::Document.new(text, :tokens => tokens)
68
+ ```
69
+
70
+ Provide, by yourself, the number of times each term appears and the number of tokens in the document:
71
+
72
+ ```ruby
73
+ require 'unicode_utils'
74
+ text = "Lorem ipsum dolor sit amet..."
75
+ tokens = UnicodeUtils.each_word(text).to_a - ['and', 'the', 'to']
76
+ term_counts = Hash.new(0)
77
+ size = 0
78
+ tokens.each do |token|
79
+ # Unless the token is numeric.
80
+ unless token[/\A\d+\z/]
81
+ # Remove all punctuation from tokens.
82
+ term_counts[token.gsub(/\p{Punct}/, '')] += 1
83
+ size += 1
84
+ end
85
+ end
86
+ document1 = TfIdfSimilarity::Document.new(text, :term_counts => term_counts, :size => size)
87
+ ```
88
+
61
89
  [Read the documentation at RubyDoc.info.](http://rubydoc.info/gems/tf-idf-similarity)
62
90
 
63
91
  ## Speed
64
92
 
65
93
  Instead of using the Ruby Standard Library's [Matrix](http://www.ruby-doc.org/stdlib-2.0/libdoc/matrix/rdoc/Matrix.html) class, you can use one of the [GNU Scientific Library (GSL)](http://www.gnu.org/software/gsl/), [NArray](http://narray.rubyforge.org/) or [NMatrix](https://github.com/SciRuby/nmatrix) (0.0.9 or greater) gems for faster matrix operations. For example:
66
94
 
67
- require 'gsl'
68
- model = TfIdfSimilarity::TfIdfModel.new(corpus, :library => :gsl)
95
+ require 'narray'
96
+ model = TfIdfSimilarity::TfIdfModel.new(corpus, :library => :narray)
69
97
 
70
- The NMatrix gem gives access to [Automatically Tuned Linear Algebra Software (ATLAS)](http://math-atlas.sourceforge.net/), which you may know of through [Linear Algebra PACKage (LAPACK)](http://www.netlib.org/lapack/) or [Basic Linear Algebra Subprograms (BLAS)](http://www.netlib.org/blas/). Follow [these instructions](https://github.com/SciRuby/nmatrix#synopsis) to install the NMatrix gem. You may need [additional instructions for Mac OS X Lion](https://github.com/SciRuby/nmatrix/wiki/Installation).
98
+ NArray seems to have the best performance of the three libraries.
99
+
100
+ The NMatrix gem gives access to [Automatically Tuned Linear Algebra Software (ATLAS)](http://math-atlas.sourceforge.net/), which you may know of through [Linear Algebra PACKage (LAPACK)](http://www.netlib.org/lapack/) or [Basic Linear Algebra Subprograms (BLAS)](http://www.netlib.org/blas/). Follow [these instructions](https://github.com/SciRuby/nmatrix#installation) to install the NMatrix gem.
71
101
 
72
102
  ## Extras
73
103
 
@@ -76,7 +106,7 @@ You can access more term frequency, document frequency, and normalization formul
76
106
  require 'tf-idf-similarity/extras/document'
77
107
  require 'tf-idf-similarity/extras/tf_idf_model'
78
108
 
79
- The default tf*idf formula follows the [Lucene Conceptual Scoring Formula](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html).
109
+ The default tf*idf formula follows the [Lucene Conceptual Scoring Formula](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html).
80
110
 
81
111
  ## Why?
82
112
 
@@ -115,17 +145,13 @@ Adapters for the following projects were also considered:
115
145
 
116
146
  ## Further Reading
117
147
 
118
- Lucene implements many more [similarity functions](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/Similarity.html), such as:
119
-
120
- * a [divergence from randomness (DFR) framework](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/DFRSimilarity.html)
121
- * a [framework for the family of information-based models](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/IBSimilarity.html)
122
- * a [language model with Bayesian smoothing using Dirichlet priors](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html)
123
- * a [language model with Jelinek-Mercer smoothing](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html)
124
-
125
- Lucene can even [combine similarity measures](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/MultiSimilarity.html).
148
+ Lucene implements many more [similarity functions](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/Similarity.html), such as:
126
149
 
127
- ## Bugs? Questions?
150
+ * a [divergence from randomness (DFR) framework](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/DFRSimilarity.html)
151
+ * a [framework for the family of information-based models](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/IBSimilarity.html)
152
+ * a [language model with Bayesian smoothing using Dirichlet priors](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html)
153
+ * a [language model with Jelinek-Mercer smoothing](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html)
128
154
 
129
- This gem's main repository is on GitHub: [http://github.com/opennorth/tf-idf-similarity](http://github.com/opennorth/tf-idf-similarity), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
155
+ Lucene can even [combine similarity measures](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/MultiSimilarity.html).
130
156
 
131
- Copyright (c) 2012 Open North Inc., released under the MIT license
157
+ Copyright (c) 2012 James McKinney, released under the MIT license
@@ -1,12 +1,8 @@
1
1
  require 'forwardable'
2
2
  require 'set'
3
3
 
4
- begin
5
- require 'unicode_utils/downcase'
6
- require 'unicode_utils/each_word'
7
- rescue LoadError
8
- # Ruby 1.8
9
- end
4
+ require 'unicode_utils/downcase'
5
+ require 'unicode_utils/each_word'
10
6
 
11
7
  module TfIdfSimilarity
12
8
  end
@@ -1,7 +1,7 @@
1
1
  # A document-term matrix using the BM25 function.
2
2
  #
3
3
  # @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/BM25Similarity.html
4
- # @see http://en.wikipedia.org/wiki/Okapi_BM25
4
+ # @see https://en.wikipedia.org/wiki/Okapi_BM25
5
5
  module TfIdfSimilarity
6
6
  class BM25Model < Model
7
7
  # Return the term's inverse document frequency.
@@ -76,7 +76,7 @@ module TfIdfSimilarity
76
76
  # @see http://unicode.org/reports/tr29/#Default_Word_Boundaries
77
77
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.StandardTokenizerFactory
78
78
  def tokenize(text)
79
- @tokens || defined?(UnicodeUtils) && UnicodeUtils.each_word(text) || text.split(/\b/) # @todo Ruby 1.8 has no good word boundary code
79
+ @tokens || UnicodeUtils.each_word(text)
80
80
  end
81
81
  end
82
82
  end
@@ -110,7 +110,7 @@ module TfIdfSimilarity
110
110
  end
111
111
  alias_method :binary_tf, :binary_term_frequency
112
112
 
113
- # @see http://en.wikipedia.org/wiki/Tf*idf
113
+ # @see https://en.wikipedia.org/wiki/Tf*idf
114
114
  # @see http://nlp.stanford.edu/IR-book/html/htmledition/maximum-tf-normalization-1.html
115
115
  def normalized_term_frequency(document, term, a = 0)
116
116
  a + (1 - a) * document.term_count(term) / document.maximum_term_count
@@ -34,7 +34,7 @@ module TfIdfSimilarity
34
34
  elsif column.respond_to?(:normalize)
35
35
  column.normalize
36
36
  else
37
- column * (1 / Math.sqrt(column.inner_product(column))) # 1.8 does define division
37
+ column / Math.sqrt(column.inner_product(column))
38
38
  end
39
39
  end)
40
40
  end
@@ -9,7 +9,7 @@
9
9
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WordDelimiterFilterFactory
10
10
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
11
11
  module TfIdfSimilarity
12
- class Token < String
12
+ class Token < SimpleDelegator
13
13
  # Returns a falsy value if all its characters are numbers, punctuation,
14
14
  # whitespace or control characters.
15
15
  #
@@ -35,10 +35,7 @@ module TfIdfSimilarity
35
35
  #
36
36
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.LowerCaseFilterFactory
37
37
  def lowercase_filter
38
- self.class.new(defined?(UnicodeUtils) ? UnicodeUtils.downcase(self) : tr(
39
- "ÀÁÂÃÄÅĀĂĄÇĆĈĊČÐĎĐÈÉÊËĒĔĖĘĚĜĞĠĢĤĦÌÍÎÏĨĪĬĮĴĶĹĻĽĿŁÑŃŅŇŊÒÓÔÕÖØŌŎŐŔŖŘŚŜŞŠŢŤŦÙÚÛÜŨŪŬŮŰŲŴÝŶŸŹŻŽ",
40
- "àáâãäåāăąçćĉċčðďđèéêëēĕėęěĝğġģĥħìíîïĩīĭįĵķĺļľŀłñńņňŋòóôõöøōŏőŕŗřśŝşšţťŧùúûüũūŭůűųŵýŷÿźżž"
41
- ).downcase)
38
+ self.class.new(UnicodeUtils.downcase(self))
42
39
  end
43
40
 
44
41
  # Returns a string with no English possessive or periods in acronyms.
@@ -47,7 +44,7 @@ module TfIdfSimilarity
47
44
  #
48
45
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.ClassicFilterFactory
49
46
  def classic_filter
50
- self.class.new(self.gsub('.', '').chomp("'s"))
47
+ self.class.new(self.gsub('.', '').sub(/['`’]s\z/, ''))
51
48
  end
52
49
  end
53
50
  end
@@ -1,3 +1,3 @@
1
1
  module TfIdfSimilarity
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -33,7 +33,7 @@ module TfIdfSimilarity
33
33
  build_model(documents)
34
34
  end
35
35
 
36
- pending "Add #search"
36
+ skip "Add #search"
37
37
  end
38
38
 
39
39
  # @see https://github.com/bbcrd/Similarity/blob/master/test/test_corpus.rb
@@ -98,7 +98,7 @@ module TfIdfSimilarity
98
98
  end
99
99
 
100
100
  it 'should return the similarity matrix' do
101
- pending "Calculate the tf*idf matrix like the similarity gem does"
101
+ skip "Calculate the tf*idf matrix like the similarity gem does"
102
102
  end
103
103
 
104
104
  it 'should return the number of documents in which a term appears' do
@@ -113,7 +113,7 @@ module TfIdfSimilarity
113
113
  end
114
114
 
115
115
  it 'should return the document vector' do
116
- pending "Calculate the tf*idf matrix like the similarity gem does"
116
+ skip "Calculate the tf*idf matrix like the similarity gem does"
117
117
  end
118
118
  end
119
119
 
@@ -1,7 +1,11 @@
1
1
  require 'rubygems'
2
2
 
3
+ require 'simplecov'
3
4
  require 'coveralls'
4
- Coveralls.wear!
5
+ SimpleCov.formatter = Coveralls::SimpleCov::Formatter
6
+ SimpleCov.start do
7
+ add_filter 'spec'
8
+ end
5
9
 
6
10
  require 'rspec'
7
11
  require File.dirname(__FILE__) + '/../lib/tf-idf-similarity'
@@ -28,6 +28,14 @@ module TfIdfSimilarity
28
28
  Token.new("foo's").classic_filter.should == 'foo'
29
29
  end
30
30
 
31
+ it 'should remove ending possessives with nonstandard apostrophe 1' do
32
+ Token.new("foo`s").classic_filter.should == 'foo'
33
+ end
34
+
35
+ it 'should remove ending possessives with nonstandard apostrophe 2' do
36
+ Token.new("foo’s").classic_filter.should == 'foo'
37
+ end
38
+
31
39
  it 'should not remove infix possessives' do
32
40
  Token.new("foo's bar").classic_filter.should == "foo's bar"
33
41
  end
@@ -5,9 +5,8 @@ Gem::Specification.new do |s|
5
5
  s.name = "tf-idf-similarity"
6
6
  s.version = TfIdfSimilarity::VERSION
7
7
  s.platform = Gem::Platform::RUBY
8
- s.authors = ["Open North"]
9
- s.email = ["info@opennorth.ca"]
10
- s.homepage = "http://github.com/opennorth/tf-idf-similarity"
8
+ s.authors = ["James McKinney"]
9
+ s.homepage = "https://github.com/jpmckinney/tf-idf-similarity"
11
10
  s.summary = %q{Calculates the similarity between texts using tf*idf}
12
11
  s.license = 'MIT'
13
12
 
@@ -16,10 +15,9 @@ Gem::Specification.new do |s|
16
15
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
16
  s.require_paths = ["lib"]
18
17
 
19
- s.add_development_dependency('rspec', '~> 2.10')
20
- s.add_development_dependency('rake')
21
- s.add_development_dependency('coveralls')
22
- s.add_development_dependency('mime-types', '~> 1.25') # 2.0 requires Ruby 1.9.2
18
+ s.add_runtime_dependency('unicode_utils', '~> 1.4')
23
19
 
24
- s.extensions = ['ext/mkrf_conf.rb']
20
+ s.add_development_dependency('coveralls')
21
+ s.add_development_dependency('rake')
22
+ s.add_development_dependency('rspec', '~> 2.10')
25
23
  end
metadata CHANGED
@@ -1,31 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tf-idf-similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
- - Open North
7
+ - James McKinney
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-11 00:00:00.000000000 Z
11
+ date: 2016-01-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: rspec
14
+ name: unicode_utils
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.10'
20
- type: :development
19
+ version: '1.4'
20
+ type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.10'
26
+ version: '1.4'
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
28
+ name: coveralls
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: coveralls
42
+ name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -53,36 +53,33 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: mime-types
56
+ name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.25'
61
+ version: '2.10'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.25'
68
+ version: '2.10'
69
69
  description:
70
- email:
71
- - info@opennorth.ca
70
+ email:
72
71
  executables: []
73
- extensions:
74
- - ext/mkrf_conf.rb
72
+ extensions: []
75
73
  extra_rdoc_files: []
76
74
  files:
77
75
  - ".gitignore"
76
+ - ".rspec"
78
77
  - ".travis.yml"
79
78
  - ".yardopts"
80
79
  - Gemfile
81
80
  - LICENSE
82
81
  - README.md
83
82
  - Rakefile
84
- - USAGE
85
- - ext/mkrf_conf.rb
86
83
  - lib/tf-idf-similarity.rb
87
84
  - lib/tf-idf-similarity/bm25_model.rb
88
85
  - lib/tf-idf-similarity/document.rb
@@ -102,7 +99,7 @@ files:
102
99
  - spec/tf_idf_model_spec.rb
103
100
  - spec/token_spec.rb
104
101
  - td-idf-similarity.gemspec
105
- homepage: http://github.com/opennorth/tf-idf-similarity
102
+ homepage: https://github.com/jpmckinney/tf-idf-similarity
106
103
  licenses:
107
104
  - MIT
108
105
  metadata: {}
@@ -122,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
119
  version: '0'
123
120
  requirements: []
124
121
  rubyforge_project:
125
- rubygems_version: 2.2.2
122
+ rubygems_version: 2.4.5
126
123
  signing_key:
127
124
  specification_version: 4
128
125
  summary: Calculates the similarity between texts using tf*idf
data/USAGE DELETED
@@ -1 +0,0 @@
1
- See README.md for full usage details.
@@ -1,15 +0,0 @@
1
- # @see http://www.programmersparadox.com/2012/05/21/gemspec-loading-dependent-gems-based-on-the-users-system/
2
- require 'rubygems/dependency_installer.rb'
3
-
4
- installer = Gem::DependencyInstaller.new
5
- begin
6
- unless RUBY_VERSION < '1.9'
7
- installer.install('unicode_utils', '>=0')
8
- end
9
- rescue
10
- exit(1)
11
- end
12
-
13
- f = File.open(File.join(File.dirname(__FILE__), "Rakefile"), "w")
14
- f.write("task :default\n")
15
- f.close