ruigi 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d2ffce2571016ff1adbbfa916134e90968bca826
4
+ data.tar.gz: 200149a6cd2f74c1dd178698cfa624adfe11eb12
5
+ SHA512:
6
+ metadata.gz: af93a13deeef93ff520de19052b15dc42a80658d90ceeb4b9be94b31390d0d2a7a257a91bf25705f99a70bd8d36b9d7f8ba3190539c63bb27f95e1781e1a2a3c
7
+ data.tar.gz: db45474afcaf7e6a1ec626d2df65f25d56646d43523454e73611fa39eb78ab6dd14022de9bb35ea80bc50a48c4805412a268ba9226558670f058002ed3b59d5e
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /vendor/bundle
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
@@ -0,0 +1,49 @@
1
+ # Contributor Code of Conduct
2
+
3
+ As contributors and maintainers of this project, and in the interest of
4
+ fostering an open and welcoming community, we pledge to respect all people who
5
+ contribute through reporting issues, posting feature requests, updating
6
+ documentation, submitting pull requests or patches, and other activities.
7
+
8
+ We are committed to making participation in this project a harassment-free
9
+ experience for everyone, regardless of level of experience, gender, gender
10
+ identity and expression, sexual orientation, disability, personal appearance,
11
+ body size, race, ethnicity, age, religion, or nationality.
12
+
13
+ Examples of unacceptable behavior by participants include:
14
+
15
+ * The use of sexualized language or imagery
16
+ * Personal attacks
17
+ * Trolling or insulting/derogatory comments
18
+ * Public or private harassment
19
+ * Publishing other's private information, such as physical or electronic
20
+ addresses, without explicit permission
21
+ * Other unethical or unprofessional conduct
22
+
23
+ Project maintainers have the right and responsibility to remove, edit, or
24
+ reject comments, commits, code, wiki edits, issues, and other contributions
25
+ that are not aligned to this Code of Conduct, or to ban temporarily or
26
+ permanently any contributor for other behaviors that they deem inappropriate,
27
+ threatening, offensive, or harmful.
28
+
29
+ By adopting this Code of Conduct, project maintainers commit themselves to
30
+ fairly and consistently applying these principles to every aspect of managing
31
+ this project. Project maintainers who do not follow or enforce the Code of
32
+ Conduct may be permanently removed from the project team.
33
+
34
+ This code of conduct applies both within project spaces and in public spaces
35
+ when an individual is representing the project or its community.
36
+
37
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
38
+ reported by contacting a project maintainer at k12naoki@gmail.com. All
39
+ complaints will be reviewed and investigated and will result in a response that
40
+ is deemed necessary and appropriate to the circumstances. Maintainers are
41
+ obligated to maintain confidentiality with regard to the reporter of an
42
+ incident.
43
+
44
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
45
+ version 1.3.0, available at
46
+ [http://contributor-covenant.org/version/1/3/0/][version]
47
+
48
+ [homepage]: http://contributor-covenant.org
49
+ [version]: http://contributor-covenant.org/version/1/3/0/
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'stackprof'
4
+ gem 'pry'
5
+ gem 'pry-byebug'
6
+ # Specify your gem's dependencies in ruigi.gemspec
7
+ gemspec
@@ -0,0 +1,49 @@
1
+ # Ruigi
2
+ [![wercker status](https://app.wercker.com/status/67c3adba6bb11fcb230401bd76d4911d/s/master "wercker status")](https://app.wercker.com/project/byKey/67c3adba6bb11fcb230401bd76d4911d)
3
+
4
+ ## Ruigi = Ruby + Luigi
5
+ It is said that the name of Luigi came from __similarity__( it's called "ruigi" in Japanese) to Mario.
6
+ And Ruigi is __similarity__ calculation library, which is implemented by Ruby.
7
+
8
+ ### algorithms
9
+ Now, only support TF-IDF and cosine similarity.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'ruigi'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install ruigi
26
+
27
+ ## Usage
28
+ 1. array of words -> Ruigi::Document
29
+ ```ruby
30
+ words = ["word1", "word2", ... , "wordN"]
31
+ document1 = Ruigi::Document.new(words)
32
+ ```
33
+
34
+ 2. Make a Model from documents of array
35
+ ```ruby
36
+ corpus = [document1, document2, ... , documentN] # each element's type is Ruigi::Document.
37
+ model = Ruigi::Model.new(corpus)
38
+ ```
39
+ 3. You can get feature vector for each document and calculate similarity between document.
40
+ ```ruby
41
+ model.feature_vector_of(0) # => return feature vector of 0th document
42
+ ```
43
+ etc...
44
+
45
+ ## Contributing
46
+
47
+ Bug reports and pull requests are welcome on GitHub at https://github.com/naoki-k/ruigi.
48
+ This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
49
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,7 @@
1
+ require 'ruigi/constraints'
2
+ require 'ruigi/document'
3
+ require 'ruigi/error'
4
+ require 'ruigi/model'
5
+ require 'ruigi/version'
6
+ require 'ruigi/word'
7
+ require 'nmatrix'
@@ -0,0 +1,12 @@
1
+ module Ruigi
2
+ ENABLED_PARTS_OF_SPEECH = %w(
3
+ 名詞
4
+ 形容詞
5
+ 動詞
6
+ ).map(&:freeze).freeze
7
+
8
+ EXCEPT_REGEXP = %r(
9
+ \\A\\p{hiragana}る\\z
10
+ \d+
11
+ ).freeze
12
+ end
@@ -0,0 +1,33 @@
1
+ module Ruigi
2
+ class Document
3
+ extend Forwardable
4
+
5
+ attr_accessor :model, :words
6
+
7
+ def_delegator :model, :wordlist, :all_words
8
+
9
+ def initialize(words)
10
+ raise TypeError unless words.is_a?(Array)
11
+ raise TypeError unless words.all? { |e| e.is_a?(String) }
12
+
13
+ self.words = Hash.new
14
+ words.group_by { |e| e }.each do |k, v|
15
+ word = Ruigi::Word.new(k, v.length)
16
+ word.document = self
17
+ self.words[k] = word
18
+ end
19
+ end
20
+
21
+ def feature_vector
22
+ @feature_vector ||=
23
+ begin
24
+ vector = all_words.map { |word| words.keys.include?(word) ? words[word].tfidf : 0.0 }
25
+ norm = Math.sqrt(vector.inject(0) { |sum, e| sum + e ** 2 })
26
+ vector.map do |e|
27
+ quot = e / norm
28
+ quot.nan? ? 0.0 : quot
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,4 @@
1
+ module Ruigi
2
+ class NoModelError < StandardError; end
3
+ class NoDocumentError < StandardError; end
4
+ end
@@ -0,0 +1,32 @@
1
+ module Ruigi
2
+ class Model
3
+ attr_accessor :corpus
4
+
5
+ def initialize(corpus)
6
+ raise TypeError unless corpus.is_a?(Array)
7
+ raise TypeError unless corpus.all? { |e| e.is_a?(Ruigi::Document) }
8
+
9
+ self.corpus = corpus
10
+
11
+ corpus.each do |document|
12
+ document.model = self
13
+ end
14
+ end
15
+
16
+ def matrix
17
+ @matrix ||= corpus.map(&:feature_vector).to_nm
18
+ end
19
+
20
+ def feature_vector_of(index)
21
+ corpus[index].feature_vector.to_nm
22
+ end
23
+
24
+ def wordlist
25
+ @wordlist ||= corpus.map { |document| document.words.keys }.flatten.uniq.sort
26
+ end
27
+
28
+ def inner_product_matrix
29
+ matrix.dot(matrix.transpose)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ module Ruigi
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -0,0 +1,41 @@
1
+ module Ruigi
2
+ class Word
3
+ extend Forwardable
4
+
5
+ attr_accessor :document, :count, :name
6
+
7
+ def_delegator :document, :model, :involved_model
8
+
9
+ def initialize(name, count)
10
+ self.name = name
11
+ self.count = count
12
+ end
13
+
14
+ def tfidf
15
+ tf * idf
16
+ end
17
+
18
+ def tf
19
+ raise NoDocumentError unless document
20
+
21
+ document_length = document.words.inject(0) { |sum, (k, v)| sum + v.count }
22
+ count.to_f / document_length
23
+ end
24
+
25
+ def df
26
+ raise NoModelError unless involved_model
27
+
28
+ involved_model.corpus.count { |document| document.words[name] != nil }
29
+ end
30
+
31
+ def idf
32
+ df2idf(df)
33
+ end
34
+
35
+ private
36
+
37
+ def df2idf(df, base = 2.0, add = 0.0)
38
+ Math.log(involved_model.corpus.size.to_f / df, base) + add
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'ruigi/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "ruigi"
8
+ spec.version = Ruigi::VERSION
9
+ spec.authors = ["naoki-k"]
10
+ spec.email = ["k12naoki@gmail.com"]
11
+
12
+ spec.summary = %q{calculate similarity between documents}
13
+ spec.description = %q{calculate similarity between documents}
14
+ spec.homepage = "https://github.com/naoki-k/ruigi"
15
+
16
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
17
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
18
+ if spec.respond_to?(:metadata)
19
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
20
+ else
21
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
22
+ end
23
+
24
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
+ spec.require_paths = ["lib"]
27
+
28
+ spec.add_development_dependency "nmatrix"
29
+ spec.add_development_dependency "natto", "~> 1.1"
30
+ spec.add_development_dependency "bundler", "~> 1.12"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rspec", "~> 3.0"
33
+ end
@@ -0,0 +1,7 @@
1
+ box: ruby
2
+ build:
3
+ steps:
4
+ - bundle-install
5
+ - script:
6
+ name: rspec
7
+ code: bundle exec rspec
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruigi
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - naoki-k
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-10-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nmatrix
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: natto
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.12'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.12'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ description: calculate similarity between documents
84
+ email:
85
+ - k12naoki@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
93
+ - CODE_OF_CONDUCT.md
94
+ - Gemfile
95
+ - README.md
96
+ - Rakefile
97
+ - lib/ruigi.rb
98
+ - lib/ruigi/constraints.rb
99
+ - lib/ruigi/document.rb
100
+ - lib/ruigi/error.rb
101
+ - lib/ruigi/model.rb
102
+ - lib/ruigi/version.rb
103
+ - lib/ruigi/word.rb
104
+ - ruigi.gemspec
105
+ - wercker.yml
106
+ homepage: https://github.com/naoki-k/ruigi
107
+ licenses: []
108
+ metadata:
109
+ allowed_push_host: https://rubygems.org
110
+ post_install_message:
111
+ rdoc_options: []
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ requirements: []
125
+ rubyforge_project:
126
+ rubygems_version: 2.5.1
127
+ signing_key:
128
+ specification_version: 4
129
+ summary: calculate similarity between documents
130
+ test_files: []