daijisen 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/daijisen.rb +35 -25
- metadata +3 -3
data/lib/daijisen.rb
CHANGED
@@ -1,42 +1,52 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)) unless
|
2
2
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
3
|
|
4
|
+
=begin
|
5
|
+
Yahoo Daijisen Japanese Dictionary Scraper
|
6
|
+
Author: Kelly Dunn
|
7
|
+
=end
|
8
|
+
|
4
9
|
module Daijisen
|
5
|
-
VERSION = '0.0.
|
6
|
-
require '
|
10
|
+
VERSION = '0.0.3'
|
11
|
+
require 'rubygems'
|
12
|
+
require 'nokogiri'
|
7
13
|
require 'open-uri'
|
8
14
|
require 'cgi'
|
9
15
|
|
16
|
+
# Query Object.
|
17
|
+
# Effectively scrapes The Yahoo Daijisen Dictionary
|
18
|
+
# And finds definitions of the Japanese String passed in
|
19
|
+
#
|
20
|
+
# TODO: Incorporate SHIFT_JS encoding. Only UTF-8 works for now.
|
10
21
|
class Query
|
22
|
+
attr_accessor :defs, :query
|
11
23
|
|
12
24
|
def initialize(query)
|
13
|
-
|
25
|
+
@query = query
|
26
|
+
@defs = []
|
27
|
+
get_raw_html()
|
14
28
|
end
|
15
|
-
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
return test+find_def(x, build)
|
29
|
+
|
30
|
+
# Scraping function.
|
31
|
+
def get_raw_html()
|
32
|
+
url = "http://dic.yahoo.co.jp/search?stype=0&ei=UTF-8&dtype=2&p=" + CGI::escape(@query)
|
33
|
+
html = Nokogiri::HTML(open(url))
|
34
|
+
html.css("span.s115").each do |daiji_def|
|
35
|
+
@defs.push(Definition.new(daiji_def))
|
23
36
|
end
|
24
|
-
return ""
|
25
37
|
end
|
26
38
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
doc+=find_def(file, "")
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
doc
|
38
|
-
end
|
39
|
+
private :get_raw_html
|
40
|
+
end
|
41
|
+
|
42
|
+
# For delicious Ruby Modularity, Definitions will be OOPified.
|
43
|
+
class Definition
|
44
|
+
attr_accessor :link, :example, :reading
|
39
45
|
|
40
|
-
|
46
|
+
def initialize(def_html)
|
47
|
+
@link = def_html.css("a")[0]['href']
|
48
|
+
@reading = def_html.css("a")[0].content
|
49
|
+
@example = ""
|
50
|
+
end
|
41
51
|
end
|
42
52
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 3
|
9
|
+
version: 0.0.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Kelly Dunn
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-05-
|
17
|
+
date: 2010-05-19 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|