middleman-blog-similar 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +14 -3
- data/CHANGELOG.md +5 -0
- data/Gemfile +6 -0
- data/README.md +63 -19
- data/features/damerau_levenshtein.feature +5 -5
- data/features/levenshtein.feature +5 -5
- data/features/word_frequency.feature +15 -0
- data/fixtures/test-app/source/2014-05-08-article0.md +1 -0
- data/fixtures/test-app/source/2014-05-10-article2.md +2 -1
- data/fixtures/test-app/source/2014-05-11-article3.md +2 -1
- data/fixtures/test-app/source/2014-05-14-article6.md +1 -1
- data/fixtures/test-app/source/layouts/article.slim +2 -2
- data/lib/middleman-blog-similar/algorithm.rb +19 -0
- data/lib/middleman-blog-similar/algorithm/damerau_levenshtein.rb +7 -0
- data/lib/middleman-blog-similar/algorithm/levenshtein.rb +7 -0
- data/lib/middleman-blog-similar/algorithm/unigrams.csv +21089 -0
- data/lib/middleman-blog-similar/algorithm/word_frequency.rb +69 -0
- data/lib/middleman-blog-similar/algorithm/word_frequency/mecab.rb +20 -0
- data/lib/middleman-blog-similar/algorithm/word_frequency/tree_tagger.rb +20 -0
- data/lib/middleman-blog-similar/blog_article_extensions.rb +20 -11
- data/lib/middleman-blog-similar/extension.rb +10 -7
- data/lib/middleman-blog-similar/helpers.rb +6 -12
- data/lib/middleman-blog-similar/version.rb +1 -1
- data/middleman-blog-similar.gemspec +3 -2
- data/spec/middleman-blog-similar/algorithm/damerau_levenshtein_spec.rb +42 -0
- data/spec/middleman-blog-similar/algorithm/levenshtein_spec.rb +42 -0
- data/spec/middleman-blog-similar/algorithm/word_frequency/mecab_spec.rb +41 -0
- data/spec/middleman-blog-similar/algorithm/word_frequency/tree_tagger_spec.rb +52 -0
- data/spec/middleman-blog-similar/algorithm/word_frequency_spec.rb +73 -0
- data/spec/middleman-blog-similar/algorithm_spec.rb +40 -0
- data/spec/spec_helper.rb +26 -2
- metadata +37 -7
- data/lib/middleman-blog-similar/engines/base.rb +0 -27
- data/lib/middleman-blog-similar/engines/damerau_levenshtein.rb +0 -15
- data/lib/middleman-blog-similar/engines/levenshtein.rb +0 -15
- data/spec/middleman-blog-similar/engines/base_spec.rb +0 -4
@@ -0,0 +1,69 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'fast-stemmer'
|
4
|
+
require 'csv'
|
5
|
+
|
6
|
+
# logic ported from https://plugins.trac.wordpress.org/browser/wordpress-23-related-posts-plugin/trunk/recommendations.php
|
7
|
+
|
8
|
+
class Middleman::Blog::Similar::Algorithm::WordFrequency < ::Middleman::Blog::Similar::Algorithm
|
9
|
+
@@unigrams = nil
|
10
|
+
class << self
|
11
|
+
def unigrams_path
|
12
|
+
File.join File.dirname(__FILE__), 'unigrams.csv'
|
13
|
+
end
|
14
|
+
def unigrams
|
15
|
+
if @@unigrams.nil?
|
16
|
+
@@unigrams = {}
|
17
|
+
CSV.foreach(unigrams_path, { :col_sep => "\t" }) do|row|
|
18
|
+
@@unigrams[row[0]] = row[4].to_f if row.length == 5
|
19
|
+
end
|
20
|
+
end
|
21
|
+
@@unigrams
|
22
|
+
end
|
23
|
+
end
|
24
|
+
def distance a
|
25
|
+
d = 0xffffff
|
26
|
+
wf = a.similarity_algorithm.word_freq
|
27
|
+
word_freq.each do|word, freq|
|
28
|
+
if wf.has_key? word
|
29
|
+
d -= wf[word] * freq
|
30
|
+
end
|
31
|
+
end
|
32
|
+
d
|
33
|
+
end
|
34
|
+
def words
|
35
|
+
re = /[\t\s\n,\.、。 ]/
|
36
|
+
article.untagged_body.split(re) + article.title.split(re)
|
37
|
+
end
|
38
|
+
def generate_word_freq
|
39
|
+
suitable_words = unigrams.dup
|
40
|
+
word_freq= {}
|
41
|
+
words.each do|word|
|
42
|
+
word.downcase!
|
43
|
+
word = word.stem if word =~ /^\w+$/
|
44
|
+
word_freq[word] ||= 0
|
45
|
+
word_freq[word] += 1
|
46
|
+
end
|
47
|
+
selected_words = {}
|
48
|
+
word_freq.each do|word, freq|
|
49
|
+
selected_words[word] = unigrams[word] * Math.sqrt(freq) if unigrams[word]
|
50
|
+
end
|
51
|
+
article.tags.each do|tag|
|
52
|
+
tag = tag.downcase.stem
|
53
|
+
word_freq[tag] ||= 0
|
54
|
+
word_freq[tag] += tag_weight
|
55
|
+
end
|
56
|
+
Hash[ word_freq.sort_by{|k, v| k }.sort_by{|k, v| v } ]
|
57
|
+
end
|
58
|
+
def word_freq
|
59
|
+
@word_freq ||= generate_word_freq
|
60
|
+
end
|
61
|
+
def generate_tags
|
62
|
+
generate_word_freq.keys.reverse
|
63
|
+
end
|
64
|
+
def tags
|
65
|
+
@tags ||= generate_tags
|
66
|
+
end
|
67
|
+
def tag_weight ; 5 ; end
|
68
|
+
def unigrams ; self.class.unigrams ; end
|
69
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require 'middleman-blog-similar/algorithm/word_frequency'
|
4
|
+
|
5
|
+
class Middleman::Blog::Similar::Algorithm::WordFrequency::Mecab < ::Middleman::Blog::Similar::Algorithm::WordFrequency
|
6
|
+
class CommandNotFound < StandardError; end
|
7
|
+
def words
|
8
|
+
res = []
|
9
|
+
IO.popen("mecab 2>/dev/null", 'r+') {|f|
|
10
|
+
f.puts article.untagged_body
|
11
|
+
f.puts article.title
|
12
|
+
f.close_write
|
13
|
+
while line = f.gets
|
14
|
+
word, pos = line.split(/[\t\s]+/)
|
15
|
+
res << word if pos && pos.start_with?('名詞')
|
16
|
+
end
|
17
|
+
}
|
18
|
+
res
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'middleman-blog-similar/algorithm/word_frequency'
|
2
|
+
|
3
|
+
class Middleman::Blog::Similar::Algorithm::WordFrequency::TreeTagger < ::Middleman::Blog::Similar::Algorithm::WordFrequency
|
4
|
+
class CommandNotFound < StandardError; end
|
5
|
+
def words
|
6
|
+
raise CommandNotFound.new "You need to tree tagger command with ENV['TREETAGGER_COMMAND']" unless ENV['TREETAGGER_COMMAND']
|
7
|
+
res = []
|
8
|
+
IO.popen("#{ ENV['TREETAGGER_COMMAND'] } 2>/dev/null", 'r+') {|f|
|
9
|
+
f.puts article.untagged_body
|
10
|
+
f.puts article.title
|
11
|
+
f.close_write
|
12
|
+
while line = f.gets
|
13
|
+
word, pos = line.split(/\s+/)
|
14
|
+
# http://courses.washington.edu/hypertxt/csar-v02/penntable.html
|
15
|
+
res << word if %w{NN JJ NP}.include? pos[0..2]
|
16
|
+
end
|
17
|
+
}
|
18
|
+
res
|
19
|
+
end
|
20
|
+
end
|
@@ -1,14 +1,23 @@
|
|
1
|
-
module Middleman
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
def similar_articles
|
6
|
-
if !@similar_articles && (engine = app.similarity_engine)
|
7
|
-
@similar_articles = engine.new(self).similar_articles
|
8
|
-
end
|
9
|
-
@similar_articles || []
|
10
|
-
end
|
11
|
-
end
|
1
|
+
module Middleman::Blog::Similar::BlogArticleExtensions
|
2
|
+
def similar_articles
|
3
|
+
if !@similar_articles && similarity_algorithm
|
4
|
+
@similar_articles = similarity_algorithm.similar_articles
|
12
5
|
end
|
6
|
+
@similar_articles || []
|
7
|
+
end
|
8
|
+
def words
|
9
|
+
unless @words && similarity_algorithm
|
10
|
+
@words = similarity_algorithm.words
|
11
|
+
end
|
12
|
+
@words
|
13
|
+
end
|
14
|
+
def similarity_algorithm
|
15
|
+
if !@similarity_algorithm && (algorithm = app.similarity_algorithm)
|
16
|
+
@similarity_algorithm = algorithm.new self
|
17
|
+
end
|
18
|
+
@similarity_algorithm
|
19
|
+
end
|
20
|
+
def untagged_body
|
21
|
+
body.gsub(/<[^>]*>/ui,'')
|
13
22
|
end
|
14
23
|
end
|
@@ -1,25 +1,28 @@
|
|
1
1
|
require 'middleman-blog-similar/blog_article_extensions'
|
2
2
|
require 'middleman-blog-similar/helpers'
|
3
|
-
require 'middleman-blog-similar/
|
3
|
+
require 'middleman-blog-similar/algorithm'
|
4
4
|
|
5
5
|
module Middleman
|
6
6
|
module Blog
|
7
7
|
class SimilarExtension < ::Middleman::Extension
|
8
8
|
|
9
|
-
option :
|
9
|
+
option :algorithm, :word_frequency, 'Similar lookup algorithm'
|
10
10
|
|
11
11
|
self.defined_helpers = [ Middleman::Blog::Similar::Helpers ]
|
12
12
|
|
13
13
|
def after_configuration
|
14
14
|
require 'middleman-blog/blog_article'
|
15
15
|
::Middleman::Sitemap::Resource.send :include, Middleman::Blog::Similar::BlogArticleExtensions
|
16
|
-
|
16
|
+
algorithm = options[:algorithm].to_s
|
17
17
|
begin
|
18
|
-
require "middleman-blog-similar/
|
19
|
-
|
20
|
-
|
18
|
+
require "middleman-blog-similar/algorithm/#{algorithm}"
|
19
|
+
ns = ::Middleman::Blog::Similar::Algorithm
|
20
|
+
algorithm.split('/').each do|n|
|
21
|
+
ns = ns.const_get n.camelize
|
22
|
+
end
|
23
|
+
app.set :similarity_algorithm, ns
|
21
24
|
rescue LoadError => e
|
22
|
-
app.logger.error "Requested similar
|
25
|
+
app.logger.error "Requested similar algorithm '#{algorithm}' not found."
|
23
26
|
raise e
|
24
27
|
end
|
25
28
|
end
|
@@ -1,15 +1,9 @@
|
|
1
|
-
module Middleman
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
current_article.similar_articles
|
8
|
-
else
|
9
|
-
[]
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
1
|
+
module Middleman::Blog::Similar::Helpers
|
2
|
+
def similar_articles
|
3
|
+
if is_blog_article?
|
4
|
+
current_article.similar_articles
|
5
|
+
else
|
6
|
+
[]
|
13
7
|
end
|
14
8
|
end
|
15
9
|
end
|
@@ -15,6 +15,7 @@ Gem::Specification.new do |s|
|
|
15
15
|
s.files = `git ls-files -z`.split("\0")
|
16
16
|
s.test_files = `git ls-files -z -- {fixtures,features,spec}/*`.split("\0")
|
17
17
|
s.require_paths = ["lib"]
|
18
|
-
s.add_runtime_dependency("middleman-core", ["~> 3.2"])
|
19
|
-
s.add_runtime_dependency("middleman-blog", ["~> 3.5"])
|
18
|
+
s.add_runtime_dependency("middleman-core", ["~> 3.2" ])
|
19
|
+
s.add_runtime_dependency("middleman-blog", ["~> 3.5" ])
|
20
|
+
s.add_runtime_dependency("fast-stemmer", ["~> 1.0.2"])
|
20
21
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'middleman-blog-similar/algorithm/damerau_levenshtein'
|
3
|
+
|
4
|
+
describe 'Middleman::Blog::Similar::Algorithm::DamerauLevenshtein' do
|
5
|
+
let(:app) {
|
6
|
+
middleman_app('test-app') {
|
7
|
+
activate :similar, :algorithm => :damerau_levenshtein
|
8
|
+
}
|
9
|
+
}
|
10
|
+
let(:article) { app.sitemap.find_resource_by_destination_path '/2014/05/08/article0.html' }
|
11
|
+
let(:algorithm) { article.similarity_algorithm }
|
12
|
+
describe(:app) {
|
13
|
+
describe(:similarity_algorithm) {
|
14
|
+
subject { app.similarity_algorithm }
|
15
|
+
it { should be ::Middleman::Blog::Similar::Algorithm::DamerauLevenshtein }
|
16
|
+
}
|
17
|
+
}
|
18
|
+
describe(:similarity_algorithm) {
|
19
|
+
subject { algorithm }
|
20
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::DamerauLevenshtein }
|
21
|
+
describe(:similar_articles) {
|
22
|
+
subject { algorithm.similar_articles.map(&:url) }
|
23
|
+
it {
|
24
|
+
should eq [
|
25
|
+
"/2014/05/13/article5.html",
|
26
|
+
"/2014/05/09/article1.html",
|
27
|
+
"/2014/05/12/article4.html",
|
28
|
+
"/2014/05/14/article6.html",
|
29
|
+
"/2014/05/10/article2.html",
|
30
|
+
"/2014/05/11/article3.html"
|
31
|
+
]
|
32
|
+
}
|
33
|
+
}
|
34
|
+
}
|
35
|
+
describe(:article) {
|
36
|
+
describe(:similarity_algorithm) {
|
37
|
+
subject { algorithm }
|
38
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::DamerauLevenshtein }
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'middleman-blog-similar/algorithm/levenshtein'
|
3
|
+
|
4
|
+
describe 'Middleman::Blog::Similar::Algorithm::Levenshtein' do
|
5
|
+
let(:app) {
|
6
|
+
middleman_app('test-app') {
|
7
|
+
activate :similar, :algorithm => :levenshtein
|
8
|
+
}
|
9
|
+
}
|
10
|
+
let(:article) { app.sitemap.find_resource_by_destination_path '/2014/05/08/article0.html' }
|
11
|
+
let(:algorithm) { article.similarity_algorithm }
|
12
|
+
describe(:app) {
|
13
|
+
describe(:similarity_algorithm) {
|
14
|
+
subject { app.similarity_algorithm }
|
15
|
+
it { should be ::Middleman::Blog::Similar::Algorithm::Levenshtein }
|
16
|
+
}
|
17
|
+
}
|
18
|
+
describe(:similarity_algorithm) {
|
19
|
+
subject { algorithm }
|
20
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::Levenshtein }
|
21
|
+
describe(:similar_articles) {
|
22
|
+
subject { algorithm.similar_articles.map(&:url) }
|
23
|
+
it {
|
24
|
+
should eq [
|
25
|
+
"/2014/05/13/article5.html",
|
26
|
+
"/2014/05/09/article1.html",
|
27
|
+
"/2014/05/12/article4.html",
|
28
|
+
"/2014/05/14/article6.html",
|
29
|
+
"/2014/05/10/article2.html",
|
30
|
+
"/2014/05/11/article3.html"
|
31
|
+
]
|
32
|
+
}
|
33
|
+
}
|
34
|
+
}
|
35
|
+
describe(:article) {
|
36
|
+
describe(:similarity_algorithm) {
|
37
|
+
subject { algorithm }
|
38
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::Levenshtein }
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'middleman-blog-similar/algorithm/word_frequency/mecab'
|
5
|
+
|
6
|
+
describe 'Middleman::Blog::Similar::Algorithm::WordFrequency::Mecab' do
|
7
|
+
let(:app) {
|
8
|
+
middleman_app('test-app') {
|
9
|
+
activate :similar, :algorithm => :'word_frequency/mecab'
|
10
|
+
}
|
11
|
+
}
|
12
|
+
let(:article) { app.sitemap.find_resource_by_destination_path '/2014/05/11/article3.html' }
|
13
|
+
let(:algorithm) { article.similarity_algorithm }
|
14
|
+
describe(:app) {
|
15
|
+
describe(:similarity_algorithm) {
|
16
|
+
subject { app.similarity_algorithm }
|
17
|
+
it { should be ::Middleman::Blog::Similar::Algorithm::WordFrequency::Mecab }
|
18
|
+
}
|
19
|
+
}
|
20
|
+
describe(:similarity_algorithm) {
|
21
|
+
subject { algorithm }
|
22
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::WordFrequency::Mecab }
|
23
|
+
}
|
24
|
+
describe(:tags) {
|
25
|
+
describe(:output) {
|
26
|
+
if %x{which mecab}
|
27
|
+
subject { algorithm.tags }
|
28
|
+
it { should eq ["fox","の","国家","さん","方","誰","私","坊ちゃん","立脚","西洋","矛盾","相違","発会","時分","昨日","講演","教師","拡張","悪口","尻","変","結果","開始","周旋","向","叫び","反駁","反抗","前","人間","ネルソン","よう","関係","なん","ため","それ","そう","院","この世","お話","隙","expect","articl","3"] }
|
29
|
+
else
|
30
|
+
pending "mecab is not installed."
|
31
|
+
end
|
32
|
+
}
|
33
|
+
}
|
34
|
+
describe(:article) {
|
35
|
+
describe(:similarity_algorithm) {
|
36
|
+
subject { algorithm }
|
37
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::WordFrequency::Mecab }
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'middleman-blog-similar/algorithm/word_frequency/tree_tagger'
|
3
|
+
|
4
|
+
describe 'Middleman::Blog::Similar::Algorithm::WordFrequency::TreeTagger' do
|
5
|
+
let(:app) {
|
6
|
+
middleman_app('test-app') {
|
7
|
+
activate :similar, :algorithm => :'word_frequency/tree_tagger'
|
8
|
+
}
|
9
|
+
}
|
10
|
+
let(:article) { app.sitemap.find_resource_by_destination_path '/2014/05/10/article2.html' }
|
11
|
+
let(:algorithm) { article.similarity_algorithm }
|
12
|
+
describe(:app) {
|
13
|
+
describe(:similarity_algorithm) {
|
14
|
+
subject { app.similarity_algorithm }
|
15
|
+
it { should be ::Middleman::Blog::Similar::Algorithm::WordFrequency::TreeTagger }
|
16
|
+
}
|
17
|
+
}
|
18
|
+
describe(:similarity_algorithm) {
|
19
|
+
subject { algorithm }
|
20
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::WordFrequency::TreeTagger }
|
21
|
+
}
|
22
|
+
describe(:tags) {
|
23
|
+
describe(:output) {
|
24
|
+
if ENV['TREETAGGER_COMMAND']
|
25
|
+
subject { algorithm.tags }
|
26
|
+
it { should eq ["quick", "fox", "dog", "brown", "lazi", "articl"] }
|
27
|
+
else
|
28
|
+
pending "ENV['TREETAGGER_COMMAND'] not set."
|
29
|
+
end
|
30
|
+
}
|
31
|
+
context('if command path is not set') {
|
32
|
+
subject { -> { algorithm.tags } }
|
33
|
+
before {
|
34
|
+
@cmd = ENV['TREETAGGER_COMMAND']
|
35
|
+
ENV['TREETAGGER_COMMAND'] = nil
|
36
|
+
}
|
37
|
+
after {
|
38
|
+
ENV['TREETAGGER_COMMAND'] = @cmd if @cmd
|
39
|
+
}
|
40
|
+
describe('raises error') {
|
41
|
+
it { should raise_error Middleman::Blog::Similar::Algorithm::WordFrequency::TreeTagger::CommandNotFound }
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
45
|
+
describe(:article) {
|
46
|
+
describe(:similarity_algorithm) {
|
47
|
+
subject { algorithm }
|
48
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::WordFrequency::TreeTagger }
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'middleman-blog-similar/algorithm/word_frequency'
|
3
|
+
|
4
|
+
describe 'Middleman::Blog::Similar::Algorithm::WordFrequency' do
|
5
|
+
let(:app) {
|
6
|
+
middleman_app('test-app') {
|
7
|
+
activate :similar, :algorithm => :word_frequency
|
8
|
+
}
|
9
|
+
}
|
10
|
+
let(:article) { app.sitemap.find_resource_by_destination_path '/2014/05/10/article2.html' }
|
11
|
+
let(:algorithm) { article.similarity_algorithm }
|
12
|
+
describe(:app) {
|
13
|
+
describe(:similarity_algorithm) {
|
14
|
+
subject { app.similarity_algorithm }
|
15
|
+
it { should be ::Middleman::Blog::Similar::Algorithm::WordFrequency }
|
16
|
+
}
|
17
|
+
}
|
18
|
+
describe(:algorithm) {
|
19
|
+
subject { algorithm }
|
20
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::WordFrequency }
|
21
|
+
}
|
22
|
+
describe(:unigrams) {
|
23
|
+
describe('length of keys') {
|
24
|
+
subject { algorithm.unigrams.keys.length }
|
25
|
+
it { should be 21089 }
|
26
|
+
}
|
27
|
+
describe('class') {
|
28
|
+
subject { algorithm.unigrams }
|
29
|
+
it { should be_a_kind_of Hash }
|
30
|
+
}
|
31
|
+
}
|
32
|
+
describe(:similar_articles) {
|
33
|
+
subject { algorithm.similar_articles.map(&:url) }
|
34
|
+
it {
|
35
|
+
should eq [
|
36
|
+
"/2014/05/11/article3.html",
|
37
|
+
"/2014/05/08/article0.html",
|
38
|
+
"/2014/05/12/article4.html",
|
39
|
+
"/2014/05/13/article5.html",
|
40
|
+
"/2014/05/09/article1.html",
|
41
|
+
"/2014/05/14/article6.html"
|
42
|
+
]
|
43
|
+
}
|
44
|
+
}
|
45
|
+
describe(:tags) {
|
46
|
+
subject { algorithm.tags }
|
47
|
+
it { should eq ["fox", "quick", "dog", "brown", "the", "jump", "lazi", "over", "articl", "2"] }
|
48
|
+
}
|
49
|
+
describe(:word_freq) {
|
50
|
+
subject { algorithm.word_freq }
|
51
|
+
it {
|
52
|
+
should eq({
|
53
|
+
"brown" => 2,
|
54
|
+
"dog" => 3,
|
55
|
+
"fox" => 6,
|
56
|
+
"jump" => 1,
|
57
|
+
"lazi" => 1,
|
58
|
+
"over" => 1,
|
59
|
+
"quick" => 6,
|
60
|
+
"the" => 2,
|
61
|
+
"2" => 1,
|
62
|
+
"articl" => 1
|
63
|
+
})
|
64
|
+
}
|
65
|
+
}
|
66
|
+
describe(:article) {
|
67
|
+
describe(:similarity_algorithm) {
|
68
|
+
subject { algorithm }
|
69
|
+
it { should be_a_kind_of ::Middleman::Blog::Similar::Algorithm::WordFrequency }
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
end
|