linkterm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 2007-07-08
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/License.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2007 Linkterm
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Manifest.txt ADDED
@@ -0,0 +1,16 @@
1
+ History.txt
2
+ License.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ lib/linkterm.rb
7
+ lib/linkterm/version.rb
8
+ scripts/txt2html
9
+ setup.rb
10
+ test/test_helper.rb
11
+ test/test_linkterm.rb
12
+ website/index.html
13
+ website/index.txt
14
+ website/javascripts/rounded_corners_lite.inc.js
15
+ website/stylesheets/screen.css
16
+ website/template.rhtml
data/README.txt ADDED
@@ -0,0 +1,24 @@
1
+ README for linkterm
2
+ ===================
3
+
4
+ == INSTALL
5
+ ruby-mecabを使っているため、以下のものを事前にインストールする必要があります。
6
+
7
+ * mecab
8
+ * rb-mecab
9
+
10
+ これらのインストールが完了すると、Rubyで
11
+ require 'MeCab'できるようになります。
12
+
13
+ OS Xでのインストール方法は
14
+ http://ujihisa.nowa.jp/entry/0da2db2747
15
+ にあります。
16
+
17
+ == How to use
18
+ コマンドラインインタフェースを用意しています。
19
+ example/example.rbを実行して、適当なキーワードを入力してください。
20
+
21
+ example.rbはexample/docs以下のファイルから知識を得、
22
+ その知識に基づいて入力されたキーワードに対応する
23
+ ルールを出力します。
24
+
data/Rakefile ADDED
@@ -0,0 +1,123 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/testtask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'rake/contrib/rubyforgepublisher'
9
+ require 'fileutils'
10
+ require 'hoe'
11
+
12
+ include FileUtils
13
+ require File.join(File.dirname(__FILE__), 'lib', 'linkterm', 'version')
14
+
15
+ AUTHOR = 'Linkterm' # can also be an array of Authors
16
+ EMAIL = "ujihisa gmail com"
17
+ DESCRIPTION = "description of gem"
18
+ GEM_NAME = 'linkterm' # what ppl will type to install your gem
19
+
20
+ @config_file = "~/.rubyforge/user-config.yml"
21
+ @config = nil
22
+ def rubyforge_username
23
+ unless @config
24
+ begin
25
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
26
+ rescue
27
+ puts <<-EOS
28
+ ERROR: No rubyforge config file found: #{@config_file}"
29
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
30
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
31
+ EOS
32
+ exit
33
+ end
34
+ end
35
+ @rubyforge_username ||= @config["username"]
36
+ end
37
+
38
+ RUBYFORGE_PROJECT = 'linkterm' # The unix name for your project
39
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
40
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
41
+
42
+ NAME = "linkterm"
43
+ REV = nil
44
+ # UNCOMMENT IF REQUIRED:
45
+ # REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
46
+ VERS = Linkterm::VERSION::STRING + (REV ? ".#{REV}" : "")
47
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
48
+ RDOC_OPTS = ['--quiet', '--title', 'linkterm documentation',
49
+ "--opname", "index.html",
50
+ "--line-numbers",
51
+ "--main", "README",
52
+ "--inline-source"]
53
+
54
+ class Hoe
55
+ def extra_deps
56
+ @extra_deps.reject { |x| Array(x).first == 'hoe' }
57
+ end
58
+ end
59
+
60
+ # Generate all the Rake tasks
61
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
62
+ hoe = Hoe.new(GEM_NAME, VERS) do |p|
63
+ p.author = AUTHOR
64
+ p.description = DESCRIPTION
65
+ p.email = EMAIL
66
+ p.summary = DESCRIPTION
67
+ p.url = HOMEPATH
68
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
69
+ p.test_globs = ["test/**/test_*.rb"]
70
+ p.clean_globs |= CLEAN #An array of file patterns to delete on clean.
71
+
72
+ # == Optional
73
+ p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
74
+ #p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
75
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
76
+ end
77
+
78
+ CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
79
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
80
+ hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
81
+
82
+ desc 'Generate website files'
83
+ task :website_generate do
84
+ Dir['website/**/*.txt'].each do |txt|
85
+ sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
86
+ end
87
+ end
88
+
89
+ desc 'Upload website files to rubyforge'
90
+ task :website_upload do
91
+ host = "#{rubyforge_username}@rubyforge.org"
92
+ remote_dir = "/var/www/gforge-projects/#{PATH}/"
93
+ local_dir = 'website'
94
+ sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
95
+ end
96
+
97
+ desc 'Generate and upload website files'
98
+ task :website => [:website_generate, :website_upload, :publish_docs]
99
+
100
+ desc 'Release the website and new gem version'
101
+ task :deploy => [:check_version, :website, :release] do
102
+ puts "Remember to create SVN tag:"
103
+ puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
104
+ "svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
105
+ puts "Suggested comment:"
106
+ puts "Tagging release #{CHANGES}"
107
+ end
108
+
109
+ desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
110
+ task :local_deploy => [:website_generate, :install_gem]
111
+
112
+ task :check_version do
113
+ unless ENV['VERSION']
114
+ puts 'Must pass a VERSION=x.y.z release version'
115
+ exit
116
+ end
117
+ unless ENV['VERSION'] == VERS
118
+ puts "Please update your version.rb to match the release version, currently #{VERS}"
119
+ exit
120
+ end
121
+ end
122
+
123
+
@@ -0,0 +1,9 @@
1
+ module Linkterm #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 1
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
data/lib/linkterm.rb ADDED
@@ -0,0 +1,212 @@
1
+ $KCODE = 'u'
2
+ require 'MeCab'
3
+ require 'pp'
4
+ require 'set'
5
+ require 'mathn'
6
+ require 'linktermlisc'
7
+ require 'nkf'
8
+
9
+ class Array
10
+ def to_set
11
+ Set.new self
12
+ end
13
+
14
+ def parm
15
+ '[ [x,y] | x <- self, y <- self, x != y]'.lisc_do binding
16
+ end
17
+
18
+ def to_hash
19
+ Hash[*self.flatten]
20
+ end
21
+ end
22
+
23
+ module Enumerable
24
+ def partitions
25
+ self.inject({}) {|hash, i| (hash[yield(i)] ||= []) << i; hash }.
26
+ map {|key, value| value }
27
+ end
28
+ end
29
+
30
+ module MeCab
31
+ class Node
32
+ include Enumerable
33
+
34
+ def category
35
+ return self.feature.split(/,/)[0]
36
+ end
37
+
38
+ def each(&b)
39
+ b[self]
40
+ self.next.each(&b) if self.next
41
+ end
42
+ end
43
+ end
44
+
45
+ # 進捗経過を出力するためのもの
46
+ # example:
47
+ # cs = %w(the quick silver)
48
+ # counter = Counter.new(a.size)
49
+ # cs.each do |c|
50
+ # counter.display!
51
+ # puts c
52
+ # end
53
+ class Counter
54
+ def initialize(max, message = nil, output = STDOUT)
55
+ @i = 0
56
+ @max = max
57
+ @message = message.nil? ? '' : "[#{message}]: "
58
+ end
59
+
60
+ def display!
61
+ output.puts "#{@message} #{@i += 1}/#{@max}"
62
+ end
63
+ end
64
+
65
+ #######################################################################
66
+ module Linkterm
67
+ class Base
68
+ def initialize(options = {})
69
+ @doc_dir = options[:doc_dir] || 'docs'
70
+
71
+ @documents = Documents.new Dir.open(@doc_dir).reject {|f| /^\.+$/ =~ f }.map {|f| "#{@doc_dir}/" + f }
72
+ end
73
+
74
+ def idf(word)
75
+ unless @documents_count
76
+ all_count = Hash.new(0)
77
+ counter = Counter.new(@documents.filenames.length, 'idf')
78
+ @documents.each do |document|
79
+ counter.display!
80
+ words = Set.new []
81
+ document.each {|paragraph|
82
+ paragraph.each {|node|
83
+ words << node.surface.downcase if node.category == '名詞' and
84
+ node.surface.length > 1 and
85
+ not /^\W+$/ =~ node.surface and
86
+ not /^\d+$/ =~ node.surface
87
+ }
88
+ }
89
+ words.each {|word| all_count[word] += 1 }
90
+ end
91
+ @documents_count = all_count
92
+ end
93
+
94
+ Math.log( @documents.filenames.length / @documents_count[word] )
95
+ end
96
+
97
+ def rule_table
98
+ table = []
99
+ counter = Counter.new(@documents.filenames.size, 'trans to table')
100
+ #@documents.each(0..3) do |document|
101
+ @documents.each() do |document|
102
+ counter.display!
103
+ document.each {|paragraph|
104
+ hash = Hash.new 0
105
+ paragraph.each {|node|
106
+ hash[node.surface.downcase] += 1 if node.category == '名詞' and
107
+ node.surface.length > 2 and
108
+ not /^\W+$/ =~ node.surface and
109
+ not /^\d+$/ =~ node.surface
110
+ }
111
+ word_count = hash
112
+ # TF-IDF値の高いものみ取り出す
113
+ word_count = word_count.sort_by {|word, count| count * idf(word) }.reverse[0...3].to_hash
114
+ #word_count = word_count.select {|word, count| count * idf(word) > 1.5 }.to_hash
115
+
116
+ word_count = word_count.map {|word, _| word }.sort
117
+ table << word_count unless word_count.empty?
118
+ }
119
+ end
120
+
121
+ # rule induction
122
+ table = table.map {|line|
123
+ arr = line.partitions {|term| (/^[a-z_]+$/ =~ term) ? :eng : (term.length > 10) ? :long : :short }
124
+ arr << [] until arr.size >= 3
125
+ arr
126
+ }
127
+
128
+ array = (0...table.first.length).to_a.parm
129
+ counter = Counter.new(array.length, 'induce rule')
130
+ rules = array.map {|c1, c2|
131
+ counter.display!
132
+ pattern = '[ [x,y] | x <- table.map{|u| u[c1] }.to_set, y <- table.map{|u| u[c2] }.to_set, !x.empty?, !y.empty? ]'.lisc_do binding
133
+ pattern.map {|a1, a2|
134
+ spt = table.select {|u| u[c1] == a1 and u[c2] == a2 }.length / table.select {|u| u[c1] == a1 }.length
135
+ {:a1 => a1, :a2 => a2, :spt => spt}
136
+ }
137
+ }.flatten.select {|r| r[:spt] >= 1/2 }
138
+
139
+ rules
140
+ end
141
+ end
142
+
143
+ class Documents
144
+ attr_reader :filenames
145
+ include Enumerable
146
+
147
+ def initialize(filenames)
148
+ @filenames = filenames
149
+ end
150
+
151
+ def each(range = 0...@filenames.size)
152
+ filenames[range].each {|filename|
153
+ document = Document.new(File.open(filename, 'r') {|io| io.read })
154
+ yield document
155
+ }
156
+ end
157
+ end
158
+
159
+ class Document
160
+ include Enumerable
161
+
162
+ def initialize(text)
163
+ @text = text
164
+ end
165
+
166
+ def each
167
+ ps = @text.gsub(/<.*?>|nbsp/, ' ').split(/(\r?\n\s*){2}/)
168
+ # 短すぎるパラグラフは連結する
169
+ ps2 = ps.inject(['']) {|result, item|
170
+ (item.size > 150 ? result : result.last << ' ') << item
171
+ result
172
+ }
173
+
174
+ ps2.each {|paragraph|
175
+ yield Paragraph.new(paragraph)
176
+ }
177
+ end
178
+ end
179
+
180
+ class Paragraph
181
+ include Enumerable
182
+
183
+ def initialize(text)
184
+ @text = text
185
+ end
186
+
187
+ def each
188
+ mecab = MeCab::Tagger.new("-Ochasen")
189
+ mecab.parseToNode(NKF.nkf('-w', @text)).each {|node|
190
+ yield node
191
+ }
192
+ end
193
+ end
194
+ end
195
+
196
+
197
+ # CUI
198
+ linkterm = Linkterm::Base.new
199
+ rule_table = linkterm.rule_table
200
+ pp rule_table
201
+ puts "input?"
202
+ loop do
203
+ str = NKF.nkf('-w', gets.chop)
204
+ rule_table.select {|record|
205
+ record[:a1].any? {|r| /^#{str}/ =~ r } ||
206
+ record[:a2].any? {|r| /^#{str}/ =~ r }
207
+ }.each {|record|
208
+ puts "[#{record[:a1].join(', ')}]\t\t-> [#{record[:a2].join(', ')}]"
209
+ }
210
+ end
211
+
212
+
data/scripts/txt2html ADDED
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'redcloth'
5
+ require 'syntax/convertors/html'
6
+ require 'erb'
7
+ require File.dirname(__FILE__) + '/../lib/linkterm/version.rb'
8
+
9
+ version = Linkterm::VERSION::STRING
10
+ download = 'http://rubyforge.org/projects/linkterm'
11
+
12
+ class Fixnum
13
+ def ordinal
14
+ # teens
15
+ return 'th' if (10..19).include?(self % 100)
16
+ # others
17
+ case self % 10
18
+ when 1: return 'st'
19
+ when 2: return 'nd'
20
+ when 3: return 'rd'
21
+ else return 'th'
22
+ end
23
+ end
24
+ end
25
+
26
+ class Time
27
+ def pretty
28
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
29
+ end
30
+ end
31
+
32
+ def convert_syntax(syntax, source)
33
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
34
+ end
35
+
36
+ if ARGV.length >= 1
37
+ src, template = ARGV
38
+ template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
39
+
40
+ else
41
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
42
+ exit!
43
+ end
44
+
45
+ template = ERB.new(File.open(template).read)
46
+
47
+ title = nil
48
+ body = nil
49
+ File.open(src) do |fsrc|
50
+ title_text = fsrc.readline
51
+ body_text = fsrc.read
52
+ syntax_items = []
53
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
54
+ ident = syntax_items.length
55
+ element, syntax, source = $1, $2, $3
56
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
57
+ "syntax-temp-#{ident}"
58
+ }
59
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
60
+ body = RedCloth.new(body_text).to_html
61
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
62
+ end
63
+ stat = File.stat(src)
64
+ created = stat.ctime
65
+ modified = stat.mtime
66
+
67
+ $stdout << template.result(binding)