daijisen 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/daijisen.rb +35 -25
  2. metadata +3 -3
data/lib/daijisen.rb CHANGED
@@ -1,42 +1,52 @@
1
1
  $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
+ =begin
5
+ Yahoo Daijisen Japanese Dictionary Scraper
6
+ Author: Kelly Dunn
7
+ =end
8
+
4
9
  module Daijisen
5
- VERSION = '0.0.2'
6
- require 'rexml/document'
10
+ VERSION = '0.0.3'
11
+ require 'rubygems'
12
+ require 'nokogiri'
7
13
  require 'open-uri'
8
14
  require 'cgi'
9
15
 
16
+ # Query Object.
17
+ # Effectively scrapes The Yahoo Daijisen Dictionary
18
+ # And finds definitions of the Japanese String passed in
19
+ #
20
+ # TODO: Incorporate SHIFT_JS encoding. Only UTF-8 works for now.
10
21
  class Query
22
+ attr_accessor :defs, :query
11
23
 
12
24
  def initialize(query)
13
- get_raw_html(query)
25
+ @query = query
26
+ @defs = []
27
+ get_raw_html()
14
28
  end
15
-
16
- # Recursive helper function.
17
- # Grabs all associated data for this paticular
18
- # definition of the current query.
19
- def find_def(x, build)
20
- test = x.gets
21
- if !test.include? "</span>"
22
- return test+find_def(x, build)
29
+
30
+ # Scraping function.
31
+ def get_raw_html()
32
+ url = "http://dic.yahoo.co.jp/search?stype=0&ei=UTF-8&dtype=2&p=" + CGI::escape(@query)
33
+ html = Nokogiri::HTML(open(url))
34
+ html.css("span.s115").each do |daiji_def|
35
+ @defs.push(Definition.new(daiji_def))
23
36
  end
24
- return ""
25
37
  end
26
38
 
27
- def get_raw_html(query)
28
- doc = ""
29
- url = "http://dic.yahoo.co.jp/search?stype=0&ei=UTF-8&dtype=2&p=" + CGI::escape(query)
30
- open(url) do |file|
31
- file.each_line do |line|
32
- if line.include? "s115"
33
- doc+=find_def(file, "")
34
- end
35
- end
36
- end
37
- doc
38
- end
39
+ private :get_raw_html
40
+ end
41
+
42
+ # For delicious Ruby Modularity, Definitions will be OOPified.
43
+ class Definition
44
+ attr_accessor :link, :example, :reading
39
45
 
40
- private :find_def
46
+ def initialize(def_html)
47
+ @link = def_html.css("a")[0]['href']
48
+ @reading = def_html.css("a")[0].content
49
+ @example = ""
50
+ end
41
51
  end
42
52
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 2
9
- version: 0.0.2
8
+ - 3
9
+ version: 0.0.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Kelly Dunn
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-05-18 00:00:00 -07:00
17
+ date: 2010-05-19 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency