tf-idf-similarity 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c0ba1f941db96541f035a283df336907bf941439
4
- data.tar.gz: 22bbec24681023e880e1e4e3fa14d26356630021
3
+ metadata.gz: 736ca4c4b93d14ea046cbc4bdae930c8b88082be
4
+ data.tar.gz: 6b43e8356c59e0f48ac08f300186d4e12497368d
5
5
  SHA512:
6
- metadata.gz: 9e7cca8d705d8080dff857d2d953a6f0091e361bb0693f0ce650e64a2f4633ad5db386fa41ea8b73ae1cfe839db8e4e9f56592c98b36cdc6ab756699ecfaa5f7
7
- data.tar.gz: 3bcb9dcb07c9eb00c234920ff8d6340aac815c8181510d7ae65183e9b1d528001247439a86c6b603d973362bcee020eb0340558a9318693713cdaaa4b62a2ffd
6
+ metadata.gz: 635ea3047ba54a951020f95ab7e9412adf07a39d6042b85e605fcd0517345d506690bac11ab05f7f20e16f80106e95e8002fd1ae2ab4e466a27cc4f143ac15d6
7
+ data.tar.gz: 693ac6c70f9daf3f0a1ed06ba693d170654e7c871702641d598e59a6fc69cbd5316e76441da062aca05d0b8f67c0ba0c958a4115e2c9da17316a2ebef2190738
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
@@ -1,21 +1,25 @@
1
+ sudo: false
1
2
  language: ruby
3
+ cache: bundler
2
4
  rvm:
3
- - 1.9.2
4
5
  - 1.9.3
5
6
  - 2.0.0
6
7
  - 2.1.0
8
+ - 2.2.0
7
9
  env:
8
10
  - MATRIX_LIBRARY=gsl
9
11
  - MATRIX_LIBRARY=narray
10
12
  - MATRIX_LIBRARY=nmatrix
11
13
  - MATRIX_LIBRARY=matrix
14
+ addons:
15
+ apt:
16
+ packages:
17
+ - gsl-bin
18
+ - libgsl0-dev
19
+ # Installing ATLAS will install BLAS.
20
+ - libatlas-dev
21
+ - libatlas-base-dev
22
+ - libatlas3gf-base
12
23
  before_install:
13
24
  - bundle config build.nmatrix --with-lapacklib
14
- - if [ $MATRIX_LIBRARY = 'nmatrix' -o $MATRIX_LIBRARY = 'gsl' ]; then sudo apt-get update -qq; fi
15
- - if [ $MATRIX_LIBRARY = 'gsl' ]; then sudo apt-get install gsl-bin libgsl0-dev; fi
16
- # Installing ATLAS will install BLAS.
17
- - if [ $MATRIX_LIBRARY = 'nmatrix' ]; then sudo apt-get install -qq libatlas-dev libatlas-base-dev libatlas3gf-base; fi
18
- - if [ $MATRIX_LIBRARY = 'nmatrix' ]; then export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/atlas; fi
19
- # Travis sometimes runs without Bundler.
20
- install: bundle
21
- script: bundle exec rake --trace
25
+ - export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/atlas
data/.yardopts CHANGED
@@ -1,4 +1,3 @@
1
- --no-private
2
1
  --hide-void-return
3
2
  --embed-mixin ClassMethods
4
3
  --markup=markdown
data/Gemfile CHANGED
@@ -1,8 +1,8 @@
1
- source "http://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
- gem 'rb-gsl', '~> 1.16.0.2' if ENV['MATRIX_LIBRARY'] == 'gsl'
3
+ gem 'rb-gsl', '~> 1.16.0.2' if ENV['MATRIX_LIBRARY'] == 'gsl'
4
4
  gem 'narray', '~> 0.6.0.0' if ENV['MATRIX_LIBRARY'] == 'narray'
5
- gem 'nmatrix', '~> 0.1.0.rc5' if ENV['MATRIX_LIBRARY'] == 'nmatrix' && RUBY_VERSION >= '1.9'
5
+ gem 'nmatrix', '~> 0.1.0.rc5' if ENV['MATRIX_LIBRARY'] == 'nmatrix' && RUBY_VERSION >= '1.9'
6
6
 
7
7
  # Specify your gem's dependencies in the gemspec
8
8
  gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012 Open North Inc.
1
+ Copyright (c) 2012 James McKinney
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # Ruby Vector Space Model (VSM) with tf*idf weights
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/tf-idf-similarity.svg)](http://badge.fury.io/rb/tf-idf-similarity)
4
- [![Build Status](https://secure.travis-ci.org/opennorth/tf-idf-similarity.png)](http://travis-ci.org/opennorth/tf-idf-similarity)
5
- [![Dependency Status](https://gemnasium.com/opennorth/tf-idf-similarity.png)](https://gemnasium.com/opennorth/tf-idf-similarity)
6
- [![Coverage Status](https://coveralls.io/repos/opennorth/tf-idf-similarity/badge.png?branch=master)](https://coveralls.io/r/opennorth/tf-idf-similarity)
7
- [![Code Climate](https://codeclimate.com/github/opennorth/tf-idf-similarity.png)](https://codeclimate.com/github/opennorth/tf-idf-similarity)
3
+ [![Gem Version](https://badge.fury.io/rb/tf-idf-similarity.svg)](https://badge.fury.io/rb/tf-idf-similarity)
4
+ [![Build Status](https://secure.travis-ci.org/jpmckinney/tf-idf-similarity.png)](https://travis-ci.org/jpmckinney/tf-idf-similarity)
5
+ [![Dependency Status](https://gemnasium.com/jpmckinney/tf-idf-similarity.png)](https://gemnasium.com/jpmckinney/tf-idf-similarity)
6
+ [![Coverage Status](https://coveralls.io/repos/jpmckinney/tf-idf-similarity/badge.png)](https://coveralls.io/r/jpmckinney/tf-idf-similarity)
7
+ [![Code Climate](https://codeclimate.com/github/jpmckinney/tf-idf-similarity.png)](https://codeclimate.com/github/jpmckinney/tf-idf-similarity)
8
8
 
9
- Calculates the similarity between texts using a [bag-of-words](http://en.wikipedia.org/wiki/Bag_of_words_model) [Vector Space Model](http://en.wikipedia.org/wiki/Vector_space_model) with [Term Frequency-Inverse Document Frequency (tf*idf)](http://en.wikipedia.org/wiki/Tf*idf) weights. If your use case demands performance, use [Lucene](http://lucene.apache.org/core/) (see below).
9
+ Calculates the similarity between texts using a [bag-of-words](https://en.wikipedia.org/wiki/Bag_of_words_model) [Vector Space Model](https://en.wikipedia.org/wiki/Vector_space_model) with [Term Frequency-Inverse Document Frequency (tf*idf)](https://en.wikipedia.org/wiki/Tfidf) weights. If your use case demands performance, use [Lucene](http://lucene.apache.org/core/) (see below).
10
10
 
11
11
  ## Usage
12
12
 
@@ -24,13 +24,13 @@ document3 = TfIdfSimilarity::Document.new("Nam scelerisque dui sed leo...")
24
24
  corpus = [document1, document2, document3]
25
25
  ```
26
26
 
27
- Create a document-term matrix using [Term Frequency-Inverse Document Frequency function](http://en.wikipedia.org/wiki/):
27
+ Create a document-term matrix using [Term Frequency-Inverse Document Frequency function](https://en.wikipedia.org/wiki/Tf–idf):
28
28
 
29
29
  ```ruby
30
30
  model = TfIdfSimilarity::TfIdfModel.new(corpus)
31
31
  ```
32
32
 
33
- Or, create a document-term matrix using the [Okapi BM25 ranking function](http://en.wikipedia.org/wiki/Okapi_BM25):
33
+ Or, create a document-term matrix using the [Okapi BM25 ranking function](https://en.wikipedia.org/wiki/Okapi_BM25):
34
34
 
35
35
  ```ruby
36
36
  model = TfIdfSimilarity::BM25Model.new(corpus)
@@ -58,16 +58,46 @@ end
58
58
  puts tfidf_by_term.sort_by{|_,tfidf| -tfidf}
59
59
  ```
60
60
 
61
+ Tokenize a document yourself, for example by excluding stop words:
62
+
63
+ ```ruby
64
+ require 'unicode_utils'
65
+ text = "Lorem ipsum dolor sit amet..."
66
+ tokens = UnicodeUtils.each_word(text).to_a - ['and', 'the', 'to']
67
+ document1 = TfIdfSimilarity::Document.new(text, :tokens => tokens)
68
+ ```
69
+
70
+ Provide, by yourself, the number of times each term appears and the number of tokens in the document:
71
+
72
+ ```ruby
73
+ require 'unicode_utils'
74
+ text = "Lorem ipsum dolor sit amet..."
75
+ tokens = UnicodeUtils.each_word(text).to_a - ['and', 'the', 'to']
76
+ term_counts = Hash.new(0)
77
+ size = 0
78
+ tokens.each do |token|
79
+ # Unless the token is numeric.
80
+ unless token[/\A\d+\z/]
81
+ # Remove all punctuation from tokens.
82
+ term_counts[token.gsub(/\p{Punct}/, '')] += 1
83
+ size += 1
84
+ end
85
+ end
86
+ document1 = TfIdfSimilarity::Document.new(text, :term_counts => term_counts, :size => size)
87
+ ```
88
+
61
89
  [Read the documentation at RubyDoc.info.](http://rubydoc.info/gems/tf-idf-similarity)
62
90
 
63
91
  ## Speed
64
92
 
65
93
  Instead of using the Ruby Standard Library's [Matrix](http://www.ruby-doc.org/stdlib-2.0/libdoc/matrix/rdoc/Matrix.html) class, you can use one of the [GNU Scientific Library (GSL)](http://www.gnu.org/software/gsl/), [NArray](http://narray.rubyforge.org/) or [NMatrix](https://github.com/SciRuby/nmatrix) (0.0.9 or greater) gems for faster matrix operations. For example:
66
94
 
67
- require 'gsl'
68
- model = TfIdfSimilarity::TfIdfModel.new(corpus, :library => :gsl)
95
+ require 'narray'
96
+ model = TfIdfSimilarity::TfIdfModel.new(corpus, :library => :narray)
69
97
 
70
- The NMatrix gem gives access to [Automatically Tuned Linear Algebra Software (ATLAS)](http://math-atlas.sourceforge.net/), which you may know of through [Linear Algebra PACKage (LAPACK)](http://www.netlib.org/lapack/) or [Basic Linear Algebra Subprograms (BLAS)](http://www.netlib.org/blas/). Follow [these instructions](https://github.com/SciRuby/nmatrix#synopsis) to install the NMatrix gem. You may need [additional instructions for Mac OS X Lion](https://github.com/SciRuby/nmatrix/wiki/Installation).
98
+ NArray seems to have the best performance of the three libraries.
99
+
100
+ The NMatrix gem gives access to [Automatically Tuned Linear Algebra Software (ATLAS)](http://math-atlas.sourceforge.net/), which you may know of through [Linear Algebra PACKage (LAPACK)](http://www.netlib.org/lapack/) or [Basic Linear Algebra Subprograms (BLAS)](http://www.netlib.org/blas/). Follow [these instructions](https://github.com/SciRuby/nmatrix#installation) to install the NMatrix gem.
71
101
 
72
102
  ## Extras
73
103
 
@@ -76,7 +106,7 @@ You can access more term frequency, document frequency, and normalization formul
76
106
  require 'tf-idf-similarity/extras/document'
77
107
  require 'tf-idf-similarity/extras/tf_idf_model'
78
108
 
79
- The default tf*idf formula follows the [Lucene Conceptual Scoring Formula](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html).
109
+ The default tf*idf formula follows the [Lucene Conceptual Scoring Formula](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html).
80
110
 
81
111
  ## Why?
82
112
 
@@ -115,17 +145,13 @@ Adapters for the following projects were also considered:
115
145
 
116
146
  ## Further Reading
117
147
 
118
- Lucene implements many more [similarity functions](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/Similarity.html), such as:
119
-
120
- * a [divergence from randomness (DFR) framework](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/DFRSimilarity.html)
121
- * a [framework for the family of information-based models](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/IBSimilarity.html)
122
- * a [language model with Bayesian smoothing using Dirichlet priors](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html)
123
- * a [language model with Jelinek-Mercer smoothing](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html)
124
-
125
- Lucene can even [combine similarity measures](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/MultiSimilarity.html).
148
+ Lucene implements many more [similarity functions](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/Similarity.html), such as:
126
149
 
127
- ## Bugs? Questions?
150
+ * a [divergence from randomness (DFR) framework](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/DFRSimilarity.html)
151
+ * a [framework for the family of information-based models](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/IBSimilarity.html)
152
+ * a [language model with Bayesian smoothing using Dirichlet priors](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html)
153
+ * a [language model with Jelinek-Mercer smoothing](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html)
128
154
 
129
- This gem's main repository is on GitHub: [http://github.com/opennorth/tf-idf-similarity](http://github.com/opennorth/tf-idf-similarity), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
155
+ Lucene can even [combine similarity measures](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/MultiSimilarity.html).
130
156
 
131
- Copyright (c) 2012 Open North Inc., released under the MIT license
157
+ Copyright (c) 2012 James McKinney, released under the MIT license
@@ -1,12 +1,8 @@
1
1
  require 'forwardable'
2
2
  require 'set'
3
3
 
4
- begin
5
- require 'unicode_utils/downcase'
6
- require 'unicode_utils/each_word'
7
- rescue LoadError
8
- # Ruby 1.8
9
- end
4
+ require 'unicode_utils/downcase'
5
+ require 'unicode_utils/each_word'
10
6
 
11
7
  module TfIdfSimilarity
12
8
  end
@@ -1,7 +1,7 @@
1
1
  # A document-term matrix using the BM25 function.
2
2
  #
3
3
  # @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/BM25Similarity.html
4
- # @see http://en.wikipedia.org/wiki/Okapi_BM25
4
+ # @see https://en.wikipedia.org/wiki/Okapi_BM25
5
5
  module TfIdfSimilarity
6
6
  class BM25Model < Model
7
7
  # Return the term's inverse document frequency.
@@ -76,7 +76,7 @@ module TfIdfSimilarity
76
76
  # @see http://unicode.org/reports/tr29/#Default_Word_Boundaries
77
77
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.StandardTokenizerFactory
78
78
  def tokenize(text)
79
- @tokens || defined?(UnicodeUtils) && UnicodeUtils.each_word(text) || text.split(/\b/) # @todo Ruby 1.8 has no good word boundary code
79
+ @tokens || UnicodeUtils.each_word(text)
80
80
  end
81
81
  end
82
82
  end
@@ -110,7 +110,7 @@ module TfIdfSimilarity
110
110
  end
111
111
  alias_method :binary_tf, :binary_term_frequency
112
112
 
113
- # @see http://en.wikipedia.org/wiki/Tf*idf
113
+ # @see https://en.wikipedia.org/wiki/Tf*idf
114
114
  # @see http://nlp.stanford.edu/IR-book/html/htmledition/maximum-tf-normalization-1.html
115
115
  def normalized_term_frequency(document, term, a = 0)
116
116
  a + (1 - a) * document.term_count(term) / document.maximum_term_count
@@ -34,7 +34,7 @@ module TfIdfSimilarity
34
34
  elsif column.respond_to?(:normalize)
35
35
  column.normalize
36
36
  else
37
- column * (1 / Math.sqrt(column.inner_product(column))) # 1.8 does define division
37
+ column / Math.sqrt(column.inner_product(column))
38
38
  end
39
39
  end)
40
40
  end
@@ -9,7 +9,7 @@
9
9
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WordDelimiterFilterFactory
10
10
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
11
11
  module TfIdfSimilarity
12
- class Token < String
12
+ class Token < SimpleDelegator
13
13
  # Returns a falsy value if all its characters are numbers, punctuation,
14
14
  # whitespace or control characters.
15
15
  #
@@ -35,10 +35,7 @@ module TfIdfSimilarity
35
35
  #
36
36
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.LowerCaseFilterFactory
37
37
  def lowercase_filter
38
- self.class.new(defined?(UnicodeUtils) ? UnicodeUtils.downcase(self) : tr(
39
- "ÀÁÂÃÄÅĀĂĄÇĆĈĊČÐĎĐÈÉÊËĒĔĖĘĚĜĞĠĢĤĦÌÍÎÏĨĪĬĮĴĶĹĻĽĿŁÑŃŅŇŊÒÓÔÕÖØŌŎŐŔŖŘŚŜŞŠŢŤŦÙÚÛÜŨŪŬŮŰŲŴÝŶŸŹŻŽ",
40
- "àáâãäåāăąçćĉċčðďđèéêëēĕėęěĝğġģĥħìíîïĩīĭįĵķĺļľŀłñńņňŋòóôõöøōŏőŕŗřśŝşšţťŧùúûüũūŭůűųŵýŷÿźżž"
41
- ).downcase)
38
+ self.class.new(UnicodeUtils.downcase(self))
42
39
  end
43
40
 
44
41
  # Returns a string with no English possessive or periods in acronyms.
@@ -47,7 +44,7 @@ module TfIdfSimilarity
47
44
  #
48
45
  # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.ClassicFilterFactory
49
46
  def classic_filter
50
- self.class.new(self.gsub('.', '').chomp("'s"))
47
+ self.class.new(self.gsub('.', '').sub(/['`’]s\z/, ''))
51
48
  end
52
49
  end
53
50
  end
@@ -1,3 +1,3 @@
1
1
  module TfIdfSimilarity
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -33,7 +33,7 @@ module TfIdfSimilarity
33
33
  build_model(documents)
34
34
  end
35
35
 
36
- pending "Add #search"
36
+ skip "Add #search"
37
37
  end
38
38
 
39
39
  # @see https://github.com/bbcrd/Similarity/blob/master/test/test_corpus.rb
@@ -98,7 +98,7 @@ module TfIdfSimilarity
98
98
  end
99
99
 
100
100
  it 'should return the similarity matrix' do
101
- pending "Calculate the tf*idf matrix like the similarity gem does"
101
+ skip "Calculate the tf*idf matrix like the similarity gem does"
102
102
  end
103
103
 
104
104
  it 'should return the number of documents in which a term appears' do
@@ -113,7 +113,7 @@ module TfIdfSimilarity
113
113
  end
114
114
 
115
115
  it 'should return the document vector' do
116
- pending "Calculate the tf*idf matrix like the similarity gem does"
116
+ skip "Calculate the tf*idf matrix like the similarity gem does"
117
117
  end
118
118
  end
119
119
 
@@ -1,7 +1,11 @@
1
1
  require 'rubygems'
2
2
 
3
+ require 'simplecov'
3
4
  require 'coveralls'
4
- Coveralls.wear!
5
+ SimpleCov.formatter = Coveralls::SimpleCov::Formatter
6
+ SimpleCov.start do
7
+ add_filter 'spec'
8
+ end
5
9
 
6
10
  require 'rspec'
7
11
  require File.dirname(__FILE__) + '/../lib/tf-idf-similarity'
@@ -28,6 +28,14 @@ module TfIdfSimilarity
28
28
  Token.new("foo's").classic_filter.should == 'foo'
29
29
  end
30
30
 
31
+ it 'should remove ending possessives with nonstandard apostrophe 1' do
32
+ Token.new("foo`s").classic_filter.should == 'foo'
33
+ end
34
+
35
+ it 'should remove ending possessives with nonstandard apostrophe 2' do
36
+ Token.new("foo’s").classic_filter.should == 'foo'
37
+ end
38
+
31
39
  it 'should not remove infix possessives' do
32
40
  Token.new("foo's bar").classic_filter.should == "foo's bar"
33
41
  end
@@ -5,9 +5,8 @@ Gem::Specification.new do |s|
5
5
  s.name = "tf-idf-similarity"
6
6
  s.version = TfIdfSimilarity::VERSION
7
7
  s.platform = Gem::Platform::RUBY
8
- s.authors = ["Open North"]
9
- s.email = ["info@opennorth.ca"]
10
- s.homepage = "http://github.com/opennorth/tf-idf-similarity"
8
+ s.authors = ["James McKinney"]
9
+ s.homepage = "https://github.com/jpmckinney/tf-idf-similarity"
11
10
  s.summary = %q{Calculates the similarity between texts using tf*idf}
12
11
  s.license = 'MIT'
13
12
 
@@ -16,10 +15,9 @@ Gem::Specification.new do |s|
16
15
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
16
  s.require_paths = ["lib"]
18
17
 
19
- s.add_development_dependency('rspec', '~> 2.10')
20
- s.add_development_dependency('rake')
21
- s.add_development_dependency('coveralls')
22
- s.add_development_dependency('mime-types', '~> 1.25') # 2.0 requires Ruby 1.9.2
18
+ s.add_runtime_dependency('unicode_utils', '~> 1.4')
23
19
 
24
- s.extensions = ['ext/mkrf_conf.rb']
20
+ s.add_development_dependency('coveralls')
21
+ s.add_development_dependency('rake')
22
+ s.add_development_dependency('rspec', '~> 2.10')
25
23
  end
metadata CHANGED
@@ -1,31 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tf-idf-similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
- - Open North
7
+ - James McKinney
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-11 00:00:00.000000000 Z
11
+ date: 2016-01-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: rspec
14
+ name: unicode_utils
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.10'
20
- type: :development
19
+ version: '1.4'
20
+ type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.10'
26
+ version: '1.4'
27
27
  - !ruby/object:Gem::Dependency
28
- name: rake
28
+ name: coveralls
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: coveralls
42
+ name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -53,36 +53,33 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: mime-types
56
+ name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.25'
61
+ version: '2.10'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.25'
68
+ version: '2.10'
69
69
  description:
70
- email:
71
- - info@opennorth.ca
70
+ email:
72
71
  executables: []
73
- extensions:
74
- - ext/mkrf_conf.rb
72
+ extensions: []
75
73
  extra_rdoc_files: []
76
74
  files:
77
75
  - ".gitignore"
76
+ - ".rspec"
78
77
  - ".travis.yml"
79
78
  - ".yardopts"
80
79
  - Gemfile
81
80
  - LICENSE
82
81
  - README.md
83
82
  - Rakefile
84
- - USAGE
85
- - ext/mkrf_conf.rb
86
83
  - lib/tf-idf-similarity.rb
87
84
  - lib/tf-idf-similarity/bm25_model.rb
88
85
  - lib/tf-idf-similarity/document.rb
@@ -102,7 +99,7 @@ files:
102
99
  - spec/tf_idf_model_spec.rb
103
100
  - spec/token_spec.rb
104
101
  - td-idf-similarity.gemspec
105
- homepage: http://github.com/opennorth/tf-idf-similarity
102
+ homepage: https://github.com/jpmckinney/tf-idf-similarity
106
103
  licenses:
107
104
  - MIT
108
105
  metadata: {}
@@ -122,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
119
  version: '0'
123
120
  requirements: []
124
121
  rubyforge_project:
125
- rubygems_version: 2.2.2
122
+ rubygems_version: 2.4.5
126
123
  signing_key:
127
124
  specification_version: 4
128
125
  summary: Calculates the similarity between texts using tf*idf
data/USAGE DELETED
@@ -1 +0,0 @@
1
- See README.md for full usage details.
@@ -1,15 +0,0 @@
1
- # @see http://www.programmersparadox.com/2012/05/21/gemspec-loading-dependent-gems-based-on-the-users-system/
2
- require 'rubygems/dependency_installer.rb'
3
-
4
- installer = Gem::DependencyInstaller.new
5
- begin
6
- unless RUBY_VERSION < '1.9'
7
- installer.install('unicode_utils', '>=0')
8
- end
9
- rescue
10
- exit(1)
11
- end
12
-
13
- f = File.open(File.join(File.dirname(__FILE__), "Rakefile"), "w")
14
- f.write("task :default\n")
15
- f.close