daijisen 0.0.5 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/LICENSE +20 -0
- data/README.md +6 -0
- data/Rakefile +54 -26
- data/VERSION +1 -0
- data/lib/daijisen.rb +15 -19
- data/test/helper.rb +10 -0
- data/test/test_daijisen.rb +10 -0
- metadata +58 -79
- data/Manifest.txt +0 -11
- data/PostInstall.txt +0 -7
- data/README.rdoc +0 -48
data/.document
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Kelly
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
CHANGED
@@ -1,26 +1,54 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
require '
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "daijisen"
|
8
|
+
gem.summary = "A simple scraper tool for Japanese to Japanese definitions"
|
9
|
+
gem.description = "A simple scraper tool for the Yahoo Japanese to Japanese Daijisen Dictionary"
|
10
|
+
gem.email = "defaultstring@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/kellydunn/daijisen"
|
12
|
+
gem.authors = ["Kelly"]
|
13
|
+
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
|
+
gem.add_development_dependency "nokogiri"
|
15
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
|
+
end
|
17
|
+
Jeweler::GemcutterTasks.new
|
18
|
+
rescue LoadError
|
19
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
20
|
+
end
|
21
|
+
|
22
|
+
require 'rake/testtask'
|
23
|
+
Rake::TestTask.new(:test) do |test|
|
24
|
+
test.libs << 'lib' << 'test'
|
25
|
+
test.pattern = 'test/**/test_*.rb'
|
26
|
+
test.verbose = true
|
27
|
+
end
|
28
|
+
|
29
|
+
begin
|
30
|
+
require 'rcov/rcovtask'
|
31
|
+
Rcov::RcovTask.new do |test|
|
32
|
+
test.libs << 'test'
|
33
|
+
test.pattern = 'test/**/test_*.rb'
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
rescue LoadError
|
37
|
+
task :rcov do
|
38
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
task :test => :check_dependencies
|
43
|
+
|
44
|
+
task :default => :test
|
45
|
+
|
46
|
+
require 'rdoc/task'
|
47
|
+
Rake::RDocTask.new do |rdoc|
|
48
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
49
|
+
|
50
|
+
rdoc.rdoc_dir = 'rdoc'
|
51
|
+
rdoc.title = "daijisen #{version}"
|
52
|
+
rdoc.rdoc_files.include('README*')
|
53
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
54
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.1
|
data/lib/daijisen.rb
CHANGED
@@ -1,52 +1,48 @@
|
|
1
|
+
# TODO determine if I need these.
|
1
2
|
$:.unshift(File.dirname(__FILE__)) unless
|
2
3
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
4
|
|
4
|
-
=begin
|
5
|
-
Yahoo Daijisen Japanese Dictionary Scraper
|
6
|
-
Author: Kelly Dunn
|
7
|
-
=end
|
8
|
-
|
9
5
|
module Daijisen
|
10
|
-
VERSION = '0.0.5'
|
11
6
|
require 'rubygems'
|
12
7
|
require 'nokogiri'
|
13
8
|
require 'open-uri'
|
14
9
|
require 'cgi'
|
15
10
|
|
16
|
-
# Query Object.
|
17
|
-
# Effectively scrapes The Yahoo Daijisen Dictionary
|
18
|
-
# And finds definitions of the Japanese String passed in
|
19
|
-
#
|
20
11
|
# TODO: Incorporate SHIFT_JS encoding. Only UTF-8 works for now.
|
21
12
|
class Query
|
22
|
-
attr_accessor :defs, :query
|
13
|
+
attr_accessor :defs, :query, :url
|
23
14
|
|
24
15
|
def initialize(query)
|
25
16
|
@query = query
|
26
17
|
@defs = []
|
18
|
+
@url = ""
|
27
19
|
get_raw_html()
|
28
20
|
end
|
29
21
|
|
30
|
-
# Scraping function.
|
31
22
|
def get_raw_html()
|
32
|
-
url = "http://dic.yahoo.co.jp/search?stype=0&ei=UTF-8&dtype=2&p=" + CGI::escape(@query)
|
23
|
+
@url = "http://dic.yahoo.co.jp/search?stype=0&ei=UTF-8&dtype=2&p=" + CGI::escape(@query)
|
33
24
|
html = Nokogiri::HTML(open(url))
|
34
|
-
|
25
|
+
|
26
|
+
# TODO Configurable and diggable
|
27
|
+
html.css("#DSm1 ol > li").each do |daiji_def|
|
35
28
|
@defs.push(Definition.new(daiji_def))
|
36
29
|
end
|
37
30
|
end
|
38
31
|
|
39
|
-
|
32
|
+
def each
|
33
|
+
@defs.each do |d|
|
34
|
+
yield d
|
35
|
+
end
|
36
|
+
end
|
40
37
|
end
|
41
38
|
|
42
|
-
# For delicious Ruby Modularity, Definitions will be OOPified.
|
43
39
|
class Definition
|
44
40
|
attr_accessor :link, :example, :reading
|
45
41
|
|
46
42
|
def initialize(def_html)
|
47
|
-
@link = def_html.css("a")[0]['href']
|
48
|
-
@reading = def_html.css("a")[0].content
|
49
|
-
@example = def_html.css("
|
43
|
+
@link = def_html.css("h3 a")[0]['href']
|
44
|
+
@reading = def_html.css("h3 a")[0].content
|
45
|
+
@example = def_html.css("div")[0].content
|
50
46
|
end
|
51
47
|
end
|
52
48
|
end
|
data/test/helper.rb
ADDED
data/test/test_daijisen.rb
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
<<<<<<< HEAD
|
2
|
+
require 'helper'
|
3
|
+
|
4
|
+
class TestDaijisen < Test::Unit::TestCase
|
5
|
+
should "probably rename this file and start testing for real" do
|
6
|
+
flunk "hey buddy, you should probably rename this file and start testing for real"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
=======
|
1
10
|
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
11
|
|
3
12
|
class TestDaijisen < Test::Unit::TestCase
|
@@ -9,3 +18,4 @@ class TestDaijisen < Test::Unit::TestCase
|
|
9
18
|
assert true
|
10
19
|
end
|
11
20
|
end
|
21
|
+
>>>>>>> origin/master
|
metadata
CHANGED
@@ -1,104 +1,83 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: daijisen
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 5
|
9
|
-
version: 0.0.5
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
12
|
-
- Kelly
|
7
|
+
authors:
|
8
|
+
- Kelly
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
date: 2010-05-21 00:00:00 -07:00
|
12
|
+
date: 2011-10-02 00:00:00.000000000 -07:00
|
18
13
|
default_executable:
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
name:
|
22
|
-
|
23
|
-
|
24
|
-
requirements:
|
25
|
-
- -
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
|
28
|
-
- 2
|
29
|
-
- 0
|
30
|
-
- 4
|
31
|
-
version: 2.0.4
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: thoughtbot-shoulda
|
17
|
+
requirement: &80605390 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
32
23
|
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
name: hoe
|
36
24
|
prerelease: false
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
version:
|
25
|
+
version_requirements: *80605390
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: nokogiri
|
28
|
+
requirement: &80602970 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
46
34
|
type: :development
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *80602970
|
37
|
+
description: A simple scraper tool for the Yahoo Japanese to Japanese Daijisen Dictionary
|
38
|
+
email: defaultstring@gmail.com
|
51
39
|
executables: []
|
52
|
-
|
53
40
|
extensions: []
|
54
|
-
|
55
|
-
|
41
|
+
extra_rdoc_files:
|
42
|
+
- LICENSE
|
43
|
+
- README.md
|
44
|
+
files:
|
45
|
+
- .document
|
56
46
|
- History.txt
|
57
|
-
-
|
58
|
-
-
|
59
|
-
files:
|
60
|
-
- History.txt
|
61
|
-
- Manifest.txt
|
62
|
-
- PostInstall.txt
|
63
|
-
- README.rdoc
|
47
|
+
- LICENSE
|
48
|
+
- README.md
|
64
49
|
- Rakefile
|
50
|
+
- VERSION
|
65
51
|
- lib/daijisen.rb
|
66
52
|
- script/console
|
67
53
|
- script/destroy
|
68
54
|
- script/generate
|
55
|
+
- test/helper.rb
|
69
56
|
- test/test_daijisen.rb
|
70
57
|
- test/test_helper.rb
|
71
58
|
has_rdoc: true
|
72
|
-
homepage: http://github.com
|
59
|
+
homepage: http://github.com/kellydunn/daijisen
|
73
60
|
licenses: []
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
- --main
|
78
|
-
- README.rdoc
|
79
|
-
require_paths:
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options: []
|
63
|
+
require_paths:
|
80
64
|
- lib
|
81
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
requirements:
|
90
|
-
- -
|
91
|
-
- !ruby/object:Gem::Version
|
92
|
-
|
93
|
-
- 0
|
94
|
-
version: "0"
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
95
77
|
requirements: []
|
96
|
-
|
97
|
-
|
98
|
-
rubygems_version: 1.3.6
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 1.6.2
|
99
80
|
signing_key:
|
100
81
|
specification_version: 3
|
101
|
-
summary:
|
102
|
-
test_files:
|
103
|
-
- test/test_daijisen.rb
|
104
|
-
- test/test_helper.rb
|
82
|
+
summary: A simple scraper tool for Japanese to Japanese definitions
|
83
|
+
test_files: []
|
data/Manifest.txt
DELETED
data/PostInstall.txt
DELETED
data/README.rdoc
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
= daijisen
|
2
|
-
|
3
|
-
* http://github.com/#{github_username}/#{project_name}
|
4
|
-
|
5
|
-
== DESCRIPTION:
|
6
|
-
|
7
|
-
FIX (describe your package)
|
8
|
-
|
9
|
-
== FEATURES/PROBLEMS:
|
10
|
-
|
11
|
-
* FIX (list of features or problems)
|
12
|
-
|
13
|
-
== SYNOPSIS:
|
14
|
-
|
15
|
-
FIX (code sample of usage)
|
16
|
-
|
17
|
-
== REQUIREMENTS:
|
18
|
-
|
19
|
-
* FIX (list of requirements)
|
20
|
-
|
21
|
-
== INSTALL:
|
22
|
-
|
23
|
-
* FIX (sudo gem install, anything else)
|
24
|
-
|
25
|
-
== LICENSE:
|
26
|
-
|
27
|
-
(The MIT License)
|
28
|
-
|
29
|
-
Copyright (c) 2010 FIXME full name
|
30
|
-
|
31
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
32
|
-
a copy of this software and associated documentation files (the
|
33
|
-
'Software'), to deal in the Software without restriction, including
|
34
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
35
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
36
|
-
permit persons to whom the Software is furnished to do so, subject to
|
37
|
-
the following conditions:
|
38
|
-
|
39
|
-
The above copyright notice and this permission notice shall be
|
40
|
-
included in all copies or substantial portions of the Software.
|
41
|
-
|
42
|
-
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
43
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
44
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
45
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
46
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
47
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
48
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|