kudzu 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kudzu/adapter/memory/frontier.rb +1 -1
  3. data/lib/kudzu/adapter/memory/model/link.rb +2 -6
  4. data/lib/kudzu/adapter/memory/model/page.rb +3 -8
  5. data/lib/kudzu/adapter/memory/repository.rb +0 -2
  6. data/lib/kudzu/adapter/memory.rb +3 -4
  7. data/lib/kudzu/agent/all.rb +1 -1
  8. data/lib/kudzu/agent/fetcher.rb +46 -49
  9. data/lib/kudzu/agent/http/connection.rb +9 -0
  10. data/lib/kudzu/agent/http/connection_pool.rb +50 -0
  11. data/lib/kudzu/agent/page_filterer.rb +58 -0
  12. data/lib/kudzu/agent/reference.rb +9 -0
  13. data/lib/kudzu/agent/response.rb +14 -0
  14. data/lib/kudzu/agent/robots/parser.rb +91 -0
  15. data/lib/kudzu/agent/robots/txt.rb +34 -0
  16. data/lib/kudzu/agent/robots.rb +12 -123
  17. data/lib/kudzu/agent/sleeper.rb +2 -2
  18. data/lib/kudzu/agent/url_extractor.rb +60 -46
  19. data/lib/kudzu/agent/{url_filter.rb → url_filterer.rb} +26 -13
  20. data/lib/kudzu/agent/util/charset_detector.rb +84 -0
  21. data/lib/kudzu/agent/util/content_type_parser.rb +28 -0
  22. data/lib/kudzu/agent/util/matcher.rb +25 -0
  23. data/lib/kudzu/agent/util/mime_type_detector.rb +38 -0
  24. data/lib/kudzu/agent/util/title_parser.rb +30 -0
  25. data/lib/kudzu/agent.rb +42 -0
  26. data/lib/kudzu/callback.rb +4 -2
  27. data/lib/kudzu/config/filter.rb +11 -11
  28. data/lib/kudzu/config.rb +20 -25
  29. data/lib/kudzu/crawler.rb +65 -146
  30. data/lib/kudzu/{adapter/base → model}/all.rb +0 -0
  31. data/lib/kudzu/model/base.rb +9 -0
  32. data/lib/kudzu/model/link.rb +9 -0
  33. data/lib/kudzu/model/page.rb +112 -0
  34. data/lib/kudzu/thread_pool.rb +36 -0
  35. data/lib/kudzu/version.rb +1 -1
  36. data/lib/kudzu.rb +21 -3
  37. metadata +21 -19
  38. data/lib/kudzu/adapter/base/link.rb +0 -8
  39. data/lib/kudzu/adapter/base/page.rb +0 -106
  40. data/lib/kudzu/adapter/memory/all.rb +0 -3
  41. data/lib/kudzu/agent/charset_detector.rb +0 -84
  42. data/lib/kudzu/agent/filter.rb +0 -40
  43. data/lib/kudzu/agent/mime_type_detector.rb +0 -34
  44. data/lib/kudzu/agent/title_parser.rb +0 -16
  45. data/lib/kudzu/logger.rb +0 -20
  46. data/lib/kudzu/revisit/all.rb +0 -3
  47. data/lib/kudzu/revisit/scheduler.rb +0 -28
  48. data/lib/kudzu/util/all.rb +0 -3
  49. data/lib/kudzu/util/connection_pool.rb +0 -56
  50. data/lib/kudzu/util/content_type_parser.rb +0 -24
  51. data/lib/kudzu/util/matcher.rb +0 -21
  52. data/lib/kudzu/util/thread_pool.rb +0 -38
@@ -1,5 +1,3 @@
1
- require 'nokogiri'
2
-
3
1
  module Kudzu
4
2
  class Agent
5
3
  class UrlExtractor
@@ -7,25 +5,32 @@ module Kudzu
7
5
  @config = config
8
6
  end
9
7
 
10
- def extract(page, base_url)
11
- hrefs = if page.html?
12
- FromHTML.new(@config).extract(page)
13
- elsif page.xml?
14
- FromXML.new(@config).extract(page)
15
- else
16
- []
17
- end
18
-
19
- hrefs.select do |href|
20
- href[:url] = normalize(href[:url], base_url)
8
+ def extract(response)
9
+ refs = if response.html?
10
+ ForHTML.new(@config).extract(response)
11
+ elsif response.xml?
12
+ ForXML.new(@config).extract(response)
13
+ else
14
+ []
15
+ end
16
+
17
+ refs.each do |ref|
18
+ ref.url = sanitize(ref.url)
19
+ ref.url = normalize(ref.url, response.url)
21
20
  end
21
+ refs.reject { |ref| ref.url.nil? }.uniq
22
22
  end
23
23
 
24
24
  private
25
25
 
26
+ def sanitize(url)
27
+ url.gsub(/^( | |%20)+/, '')
28
+ end
29
+
26
30
  def normalize(url, base_url)
27
- uri = Addressable::URI.parse(base_url.to_s).join(url.to_s).normalize
31
+ uri = Addressable::URI.parse(base_url).join(url).normalize
28
32
  uri.path = '/' unless uri.path
33
+ uri.path = uri.path.gsub(%r|/{2,}|, '/')
29
34
  uri.fragment = nil
30
35
 
31
36
  if uri.scheme.in?(%w(http https))
@@ -33,35 +38,37 @@ module Kudzu
33
38
  else
34
39
  nil
35
40
  end
41
+ rescue => e
42
+ Kudzu.log :warn, "failed to normalize url: #{url}", error: e
43
+ nil
36
44
  end
37
45
 
38
- class FromHTML < UrlExtractor
46
+ class ForHTML
39
47
  def initialize(config)
40
- super
41
- @content_type_parser = Kudzu::Util::ContentTypeParser.new
48
+ @config = config
42
49
  end
43
50
 
44
- def extract(page)
45
- doc = Nokogiri::HTML(page.decoded_body)
46
- return [] if nofollow?(doc)
51
+ def extract(response)
52
+ doc = response.parsed_doc
53
+ return [] if @config.respect_nofollow && nofollow?(doc)
47
54
 
48
- if (filter = @config.find_filter(page.url))
55
+ if (filter = @config.find_filter(response.url))
49
56
  if filter.allow_element
50
57
  doc = doc.search(*Array(filter.allow_element))
51
58
  end
52
59
  if filter.deny_element
60
+ doc = doc.dup
53
61
  doc.search(*Array(filter.deny_element)).remove
54
62
  end
55
63
  end
56
64
 
57
- hrefs = from_html(doc) + from_html_in_meta(doc)
58
- hrefs.reject { |href| href[:url].empty? }.uniq
65
+ refs = from_html(doc) + from_meta(doc)
66
+ refs.reject { |ref| ref.url.nil? || ref.url.empty? }
59
67
  end
60
68
 
61
69
  private
62
70
 
63
71
  def nofollow?(doc)
64
- return false unless @config.respect_nofollow
65
72
  nodes = doc.xpath('//meta[@name]')
66
73
  nodes.any? { |node| node[:name] =~ /^robots$/i && node[:content] =~ /nofollow/i }
67
74
  end
@@ -73,10 +80,10 @@ module Kudzu
73
80
  nodes.reject! { |url| url[:rel] =~ /nofollow/i }
74
81
  end
75
82
 
76
- nodes.map { |node|
77
- { url: (node[:href] || node[:src]).to_s.strip,
78
- title: node_to_title(node) }
79
- }
83
+ nodes.map do |node|
84
+ Reference.new(url: (node[:href] || node[:src]).to_s,
85
+ title: node_to_title(node))
86
+ end
80
87
  end
81
88
 
82
89
  def node_to_title(node)
@@ -87,35 +94,42 @@ module Kudzu
87
94
  end
88
95
  end
89
96
 
90
- def from_html_in_meta(doc)
97
+ def from_meta(doc)
91
98
  nodes = doc.xpath('.//meta[@http-equiv]').select { |node| node[:'http-equiv'] =~ /^refresh$/i }
92
- urls = nodes.map { |node| @content_type_parser.parse(node[:content]).last[:url] }.compact
93
- urls.map { |url| { url: url.to_s.strip } }
99
+ urls = nodes.map { |node| Util::ContentTypeParser.parse(node[:content]).last[:url] }.compact
100
+ urls.map do |url|
101
+ Reference.new(url: url.to_s)
102
+ end
94
103
  end
95
104
  end
96
105
 
97
- class FromXML < UrlExtractor
98
- def extract(page)
99
- doc = Nokogiri::XML(page.decoded_body)
106
+ class ForXML
107
+ def initialize(config)
108
+ @config = config
109
+ end
110
+
111
+ def extract(response)
112
+ doc = response.parsed_doc.dup
100
113
  doc.remove_namespaces!
101
- hrefs = from_xml_rss(doc) + from_xml_atom(doc)
102
- hrefs.reject { |href| href[:url].empty? }.uniq
114
+
115
+ refs = from_rss(doc) + from_atom(doc)
116
+ refs.reject { |ref| ref.url.nil? || ref.url.empty? }
103
117
  end
104
118
 
105
119
  private
106
120
 
107
- def from_xml_rss(doc)
108
- doc.xpath('rss/channel').map { |node|
109
- { url: node.xpath('./item/link').inner_text.strip,
110
- title: node.xpath('./item/title').inner_text }
111
- }
121
+ def from_rss(doc)
122
+ doc.xpath('rss/channel').map do |node|
123
+ Reference.new(url: node.xpath('./item/link').inner_text,
124
+ title: node.xpath('./item/title').inner_text)
125
+ end
112
126
  end
113
127
 
114
- def from_xml_atom(doc)
115
- doc.xpath('feed/entry').map { |node|
116
- { url: node.xpath('./link[@href]/@href').to_s.strip,
117
- title: node.xpath('./title').inner_text }
118
- }
128
+ def from_atom(doc)
129
+ doc.xpath('feed/entry').map do |node|
130
+ Reference.new(url: node.xpath('./link[@href]/@href').to_s,
131
+ title: node.xpath('./title').inner_text)
132
+ end
119
133
  end
120
134
  end
121
135
  end
@@ -1,22 +1,28 @@
1
1
  module Kudzu
2
2
  class Agent
3
- class UrlFilter
4
- def initialize(config)
3
+ class UrlFilterer
4
+ def initialize(config, robots = nil)
5
5
  @config = config
6
- @matcher = Kudzu::Util::Matcher.new
6
+ @robots = robots
7
7
  end
8
8
 
9
- def filter(hrefs, base_url)
9
+ def filter(refs, base_url)
10
10
  base_uri = Addressable::URI.parse(base_url)
11
11
  filter = @config.find_filter(base_uri)
12
12
 
13
- hrefs.partition do |href|
14
- allowed?(href[:url], base_uri, filter: filter)
13
+ refs.select do |ref|
14
+ if allowed?(ref.uri, base_uri, filter: filter)
15
+ Kudzu.log :debug, "passed url: #{ref.url}"
16
+ true
17
+ else
18
+ Kudzu.log :debug, "dropped url: #{ref.url}"
19
+ false
20
+ end
15
21
  end
16
22
  end
17
23
 
18
- def allowed?(url, base_uri, filter: nil)
19
- uri = Addressable::URI.parse(url)
24
+ def allowed?(uri, base_uri, filter: nil)
25
+ uri = Addressable::URI.parse(uri) if uri.is_a?(String)
20
26
  base_uri = Addressable::URI.parse(base_uri) if base_uri.is_a?(String)
21
27
  filter ||= @config.find_filter(base_uri)
22
28
  return true unless filter
@@ -26,7 +32,8 @@ module Kudzu
26
32
  allowed_url?(uri, filter) &&
27
33
  allowed_host?(uri, filter) &&
28
34
  allowed_path?(uri, filter) &&
29
- allowed_ext?(uri, filter)
35
+ allowed_ext?(uri, filter) &&
36
+ allowed_by_robots?(uri)
30
37
  end
31
38
 
32
39
  private
@@ -44,21 +51,27 @@ module Kudzu
44
51
  end
45
52
 
46
53
  def allowed_url?(uri, filter)
47
- @matcher.match?(uri.to_s, allows: filter.allow_url, denies: filter.deny_url)
54
+ Util::Matcher.match?(uri.to_s, allows: filter.allow_url, denies: filter.deny_url)
48
55
  end
49
56
 
50
57
  def allowed_host?(uri, filter)
51
- @matcher.match?(uri.host, allows: filter.allow_host, denies: filter.deny_host)
58
+ Util::Matcher.match?(uri.host, allows: filter.allow_host, denies: filter.deny_host)
52
59
  end
53
60
 
54
61
  def allowed_path?(uri, filter)
55
- @matcher.match?(uri.path, allows: filter.allow_path, denies: filter.deny_path)
62
+ Util::Matcher.match?(uri.path, allows: filter.allow_path, denies: filter.deny_path)
56
63
  end
57
64
 
58
65
  def allowed_ext?(uri, filter)
59
66
  ext = uri.extname.to_s.sub(/^\./, '')
60
67
  return true if ext.empty?
61
- @matcher.match?(ext, allows: filter.allow_ext, denies: filter.deny_ext)
68
+ Util::Matcher.match?(ext, allows: filter.allow_ext, denies: filter.deny_ext)
69
+ end
70
+
71
+ def allowed_by_robots?(uri)
72
+ return true unless @robots
73
+ return true unless @config.respect_robots_txt
74
+ @robots.allowed?(uri)
62
75
  end
63
76
  end
64
77
  end
@@ -0,0 +1,84 @@
1
+ module Kudzu
2
+ class Agent
3
+ class Util
4
+ class CharsetDetector
5
+ CORRECTION = {
6
+ 'utf_8' => 'utf-8',
7
+ 'shift-jis' => 'shift_jis',
8
+ 'x-sjis' => 'shift_jis',
9
+ 'euc_jp' => 'euc-jp'
10
+ }
11
+
12
+ class << self
13
+ def detect(response)
14
+ if response.html?
15
+ from_html(response.body) || from_text(response.body)
16
+ elsif response.xml?
17
+ from_xml(response.body) || from_text(response.body)
18
+ elsif response.text?
19
+ from_text(response.body)
20
+ end
21
+ rescue => e
22
+ Kudzu.log :warn, "failed to detect charset: #{response.url}", error: e
23
+ nil
24
+ end
25
+
26
+ private
27
+
28
+ def from_html(body)
29
+ doc = Nokogiri::HTML(body.encode('utf-8', undef: :replace, invalid: :replace))
30
+
31
+ if (node = doc.xpath('//meta/@charset').first)
32
+ charset = correct(node.to_s)
33
+ return charset if charset
34
+ end
35
+
36
+ doc.xpath('//meta[@http-equiv]').each do |meta|
37
+ if meta['http-equiv'] =~ /content-type/i
38
+ charset = ContentTypeParser.parse(meta[:content].to_s)[1][:charset]
39
+ charset = correct(node.to_s)
40
+ return charset if charset
41
+ end
42
+ end
43
+
44
+ return nil
45
+ end
46
+
47
+ def from_xml(body)
48
+ doc = Nokogiri::XML(body.encode('utf-8', undef: :replace, invalid: :replace))
49
+ if doc.encoding
50
+ correct(doc.encoding)
51
+ else
52
+ nil
53
+ end
54
+ end
55
+
56
+ def from_text(text)
57
+ if text.ascii_only?
58
+ 'ascii'
59
+ else
60
+ detection = CharlockHolmes::EncodingDetector.detect(text)
61
+ if detection && detection.key?(:encoding)
62
+ detection[:encoding].downcase
63
+ else
64
+ nil
65
+ end
66
+ end
67
+ end
68
+
69
+ def correct(charset)
70
+ charset = charset.downcase
71
+ charset = CORRECTION[charset] if CORRECTION.key?(charset)
72
+
73
+ begin
74
+ Encoding.find(charset)
75
+ rescue
76
+ charset = nil
77
+ end
78
+ charset
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,28 @@
1
+ module Kudzu
2
+ class Agent
3
+ class Util
4
+ class ContentTypeParser
5
+ class << self
6
+ def parse(content_type)
7
+ mime, *kvs = content_type.to_s.split(';').map { |str| str.strip.downcase }
8
+ params = kvs.each_with_object({}) do |kv, hash|
9
+ k, v = kv.to_s.split('=').map { |str| str.strip }
10
+ hash[k.to_sym] = unquote(v) if k && v
11
+ end
12
+ return mime, params
13
+ end
14
+
15
+ private
16
+
17
+ def unquote(str)
18
+ if str =~ /^"(.*?)"$/
19
+ $1.gsub(/\\(.)/, '\1')
20
+ else
21
+ str
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,25 @@
1
+ module Kudzu
2
+ class Agent
3
+ class Util
4
+ class Matcher
5
+ class << self
6
+ def match?(text, allows: nil, denies: nil)
7
+ match_to_allows?(text, allows) && !match_to_denies?(text, denies)
8
+ end
9
+
10
+ private
11
+
12
+ def match_to_allows?(text, allows)
13
+ allows = Array(allows)
14
+ allows.empty? || allows.any? { |allow| Kudzu::Common.match?(text, allow) }
15
+ end
16
+
17
+ def match_to_denies?(text, denies)
18
+ denies = Array(denies)
19
+ !denies.empty? && denies.any? { |deny| Kudzu::Common.match?(text, deny) }
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,38 @@
1
+ module Kudzu
2
+ class Agent
3
+ class Util
4
+ class MimeTypeDetector
5
+ DEFALUT_MIME_TYPE = 'application/octet-stream'
6
+
7
+ class << self
8
+ def detect(response)
9
+ from_header(response.response_header) ||
10
+ from_body(response.body) ||
11
+ from_url(response.url) ||
12
+ DEFALUT_MIME_TYPE
13
+ rescue => e
14
+ Kudzu.log :warn, "failed to detect mime: #{response.url}", error: e
15
+ nil
16
+ end
17
+
18
+ private
19
+
20
+ def from_header(header)
21
+ ContentTypeParser.parse(header['content-type']).first
22
+ end
23
+
24
+ def from_body(body)
25
+ mime = MIME.check_magics(StringIO.new(body))
26
+ mime.to_s if mime
27
+ end
28
+
29
+ def from_url(url)
30
+ uri = Addressable::URI.parse(url)
31
+ mime = MIME.check_globs(uri.basename)
32
+ mime.to_s if mime
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,30 @@
1
+ module Kudzu
2
+ class Agent
3
+ class Util
4
+ class TitleParser
5
+ class << self
6
+ def parse(response)
7
+ if response.html?
8
+ from_html(response.parsed_doc)
9
+ else
10
+ Addressable::URI.parse(response.url).basename
11
+ end
12
+ rescue => e
13
+ Kudzu.log :warn, "failed to parse title: #{response.url}", error: e
14
+ nil
15
+ end
16
+
17
+ private
18
+
19
+ def from_html(doc)
20
+ if (node = doc.xpath('//head/title').first)
21
+ node.inner_text.to_s
22
+ else
23
+ ''
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,42 @@
1
+ require_relative 'agent/all'
2
+
3
+ module Kudzu
4
+ class Agent
5
+ def initialize(config, &block)
6
+ @config = config
7
+
8
+ @robots = Robots.new(@config)
9
+ @fetcher = Fetcher.new(@config, @robots)
10
+ @url_extractor = UrlExtractor.new(@config)
11
+ @url_filterer = UrlFilterer.new(@config, @robots)
12
+ @page_filterer = PageFilterer.new(@config)
13
+ end
14
+
15
+ def start
16
+ yield
17
+ @fetcher.pool.close
18
+ end
19
+
20
+ def fetch(url, request_header = {})
21
+ response = @fetcher.fetch(url, request_header: request_header)
22
+ return response unless response.fetched?
23
+
24
+ response.size = response.body.size
25
+ response.digest = Digest::MD5.hexdigest(response.body)
26
+ response.mime_type = Util::MimeTypeDetector.detect(response)
27
+ response.charset = Util::CharsetDetector.detect(response) if response.text?
28
+ response.title = Util::TitleParser.parse(response)
29
+ response
30
+ end
31
+
32
+ def extract_refs(response)
33
+ refs = @url_extractor.extract(response)
34
+ @url_filterer.filter(refs, response.url)
35
+ end
36
+
37
+ def filter_response?(response)
38
+ return false if response.redirect_from && !@url_filterer.allowed?(response.url, response.redirect_from)
39
+ !@page_filterer.allowed?(response)
40
+ end
41
+ end
42
+ end
@@ -6,12 +6,14 @@ module Kudzu
6
6
  :on_server_error, # 5xx
7
7
  :on_filter, # 2xx, filtered
8
8
  :on_failure, # Exception
9
+ :before_enqueue,
10
+ :after_enqueue,
11
+ :before_fetch,
12
+ :after_fetch,
9
13
  :before_register,
10
14
  :after_register,
11
15
  :before_delete,
12
16
  :after_delete,
13
- :before_enqueue,
14
- :after_enqueue,
15
17
  ]
16
18
 
17
19
  def initialize(&block)
@@ -13,27 +13,27 @@ module Kudzu
13
13
  attr_accessor :path
14
14
  attr_accessor *SIMPLE_CONFIGS
15
15
 
16
+ def initialize(path, config = {}, &block)
17
+ @path = path
18
+ DEFAULT_CONFIG.merge(config).each do |key, value|
19
+ send("#{key}=", value)
20
+ end
21
+ if block
22
+ Delegator.new(self).instance_eval(&block)
23
+ end
24
+ end
25
+
16
26
  class Delegator
17
27
  def initialize(filter)
18
28
  @filter = filter
19
29
  end
20
30
 
21
- Kudzu::Config::Filter::SIMPLE_CONFIGS.each do |key|
31
+ SIMPLE_CONFIGS.each do |key|
22
32
  define_method(key) do |value|
23
33
  @filter.send("#{key}=", value)
24
34
  end
25
35
  end
26
36
  end
27
-
28
- def initialize(path, config = {}, &block)
29
- @path = path
30
- DEFAULT_CONFIG.merge(config).each do |key, value|
31
- send("#{key}=", value)
32
- end
33
- if block
34
- Kudzu::Config::Filter::Delegator.new(self).instance_eval(&block)
35
- end
36
- end
37
37
  end
38
38
  end
39
39
  end
data/lib/kudzu/config.rb CHANGED
@@ -4,16 +4,15 @@ require_relative 'config/filter'
4
4
  module Kudzu
5
5
  class Config
6
6
  SIMPLE_CONFIGS = [:config_file,
7
- :user_agent, :thread_num, :open_timeout, :read_timeout,
7
+ :user_agent, :thread_num, :open_timeout, :read_timeout, :keep_alive,
8
8
  :max_connection, :max_redirect, :max_depth, :default_request_header,
9
9
  :politeness_delay, :handle_cookie,
10
10
  :respect_robots_txt, :respect_nofollow, :respect_noindex,
11
- :log_file, :log_level,
12
- :revisit_mode, :revisit_min_interval, :revisit_max_interval, :revisit_default_interval,
13
11
  :filters]
14
12
  DEFAULT_CONFIG = { user_agent: "Kudzu/#{Kudzu::VERSION}",
15
13
  open_timeout: 10,
16
14
  read_timeout: 10,
15
+ keep_alive: 5,
17
16
  thread_num: 1,
18
17
  max_connection: 10,
19
18
  max_redirect: 3,
@@ -21,37 +20,17 @@ module Kudzu
21
20
  handle_cookie: true,
22
21
  respect_robots_txt: true,
23
22
  respect_nofollow: true,
24
- respect_noindex: true,
25
- revisit_mode: false,
26
- revisit_min_interval: 1,
27
- revisit_max_interval: 10,
28
- revisit_default_interval: 5 }
23
+ respect_noindex: true }
29
24
 
30
25
  attr_accessor *SIMPLE_CONFIGS
31
26
 
32
- class Delegator
33
- def initialize(config)
34
- @config = config
35
- end
36
-
37
- Kudzu::Config::SIMPLE_CONFIGS.each do |key|
38
- define_method(key) do |value|
39
- @config.send("#{key}=", value)
40
- end
41
- end
42
-
43
- def add_filter(base_url = nil, config = {}, &block)
44
- @config.add_filter(base_url, config, &block)
45
- end
46
- end
47
-
48
27
  def initialize(config = {}, &block)
49
28
  self.filters = {}
50
29
  DEFAULT_CONFIG.merge(config).each do |key, value|
51
30
  send("#{key}=", value)
52
31
  end
53
32
  if config_file || block
54
- delegator = Kudzu::Config::Delegator.new(self)
33
+ delegator = Delegator.new(self)
55
34
  delegator.instance_eval(File.read(config_file)) if config_file
56
35
  delegator.instance_eval(&block) if block
57
36
  end
@@ -75,5 +54,21 @@ module Kudzu
75
54
  end
76
55
  nil
77
56
  end
57
+
58
+ class Delegator
59
+ def initialize(config)
60
+ @config = config
61
+ end
62
+
63
+ SIMPLE_CONFIGS.each do |key|
64
+ define_method(key) do |value|
65
+ @config.send("#{key}=", value)
66
+ end
67
+ end
68
+
69
+ def add_filter(base_url = nil, config = {}, &block)
70
+ @config.add_filter(base_url, config, &block)
71
+ end
72
+ end
78
73
  end
79
74
  end