daijisen 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/daijisen.rb +35 -25
  2. metadata +3 -3
data/lib/daijisen.rb CHANGED
@@ -1,42 +1,52 @@
1
1
  $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
+ =begin
5
+ Yahoo Daijisen Japanese Dictionary Scraper
6
+ Author: Kelly Dunn
7
+ =end
8
+
4
9
  module Daijisen
5
- VERSION = '0.0.2'
6
- require 'rexml/document'
10
+ VERSION = '0.0.3'
11
+ require 'rubygems'
12
+ require 'nokogiri'
7
13
  require 'open-uri'
8
14
  require 'cgi'
9
15
 
16
+ # Query Object.
17
+ # Effectively scrapes The Yahoo Daijisen Dictionary
18
+ # And finds definitions of the Japanese String passed in
19
+ #
20
+ # TODO: Incorporate SHIFT_JS encoding. Only UTF-8 works for now.
10
21
  class Query
22
+ attr_accessor :defs, :query
11
23
 
12
24
  def initialize(query)
13
- get_raw_html(query)
25
+ @query = query
26
+ @defs = []
27
+ get_raw_html()
14
28
  end
15
-
16
- # Recursive helper function.
17
- # Grabs all associated data for this paticular
18
- # definition of the current query.
19
- def find_def(x, build)
20
- test = x.gets
21
- if !test.include? "</span>"
22
- return test+find_def(x, build)
29
+
30
+ # Scraping function.
31
+ def get_raw_html()
32
+ url = "http://dic.yahoo.co.jp/search?stype=0&ei=UTF-8&dtype=2&p=" + CGI::escape(@query)
33
+ html = Nokogiri::HTML(open(url))
34
+ html.css("span.s115").each do |daiji_def|
35
+ @defs.push(Definition.new(daiji_def))
23
36
  end
24
- return ""
25
37
  end
26
38
 
27
- def get_raw_html(query)
28
- doc = ""
29
- url = "http://dic.yahoo.co.jp/search?stype=0&ei=UTF-8&dtype=2&p=" + CGI::escape(query)
30
- open(url) do |file|
31
- file.each_line do |line|
32
- if line.include? "s115"
33
- doc+=find_def(file, "")
34
- end
35
- end
36
- end
37
- doc
38
- end
39
+ private :get_raw_html
40
+ end
41
+
42
+ # For delicious Ruby Modularity, Definitions will be OOPified.
43
+ class Definition
44
+ attr_accessor :link, :example, :reading
39
45
 
40
- private :find_def
46
+ def initialize(def_html)
47
+ @link = def_html.css("a")[0]['href']
48
+ @reading = def_html.css("a")[0].content
49
+ @example = ""
50
+ end
41
51
  end
42
52
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 2
9
- version: 0.0.2
8
+ - 3
9
+ version: 0.0.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Kelly Dunn
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-05-18 00:00:00 -07:00
17
+ date: 2010-05-19 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency