craft 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,29 +1,37 @@
1
1
  # Craft
2
2
 
3
- TODO: Write a gem description
3
+ Craft XML and HTML into objects.
4
4
 
5
- ## Installation
5
+ ## Examples
6
+ ```ruby
7
+ require 'craft'
8
+ require 'open-uri'
6
9
 
7
- Add this line to your application's Gemfile:
10
+ class Page < Craft
11
+ # Use CSS selectors
12
+ one :title, 'title'
8
13
 
9
- gem 'craft'
14
+ # Use XPath
15
+ many :links, 'a/@href'
10
16
 
11
- And then execute:
17
+ # Perform transforms on returned nodes
18
+ many :images, 'img', lambda { |img| img.attr('src').upcase }
19
+ end
12
20
 
13
- $ bundle
21
+ page = Page.parse open('http://www.google.com')
14
22
 
15
- Or install it yourself as:
23
+ page.title #=> 'Google'
24
+ page.links #=> ['http://www.google.com/imghp?hl=en&tab=wi', ...]
25
+ page.images #=> ['/LOGOS/2012/MOBY_DICK12-HP.JPG']
16
26
 
17
- $ gem install craft
27
+ class Script < Craft
28
+ one :body, 'text()'
29
+ end
18
30
 
19
- ## Usage
31
+ class Page < Craft
32
+ many :scripts, 'script', Script
33
+ end
20
34
 
21
- TODO: Write usage instructions here
22
-
23
- ## Contributing
24
-
25
- 1. Fork it
26
- 2. Create your feature branch (`git checkout -b my-new-feature`)
27
- 3. Commit your changes (`git commit -am 'Add some feature'`)
28
- 4. Push to the branch (`git push origin my-new-feature`)
29
- 5. Create new Pull Request
35
+ page = Page.parse open('http://www.google.com')
36
+ page.scripts[0].body #=> 'window.google=...'
37
+ ```
data/Rakefile CHANGED
@@ -1 +1,8 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.pattern = "spec/*_spec.rb"
6
+ end
7
+
8
+ task :default => :test
@@ -4,16 +4,19 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'craft/version'
5
5
 
6
6
  Gem::Specification.new do |gem|
7
- gem.name = "craft"
7
+ gem.name = 'craft'
8
8
  gem.version = Craft::VERSION
9
- gem.authors = ["Ezekiel Templin", "Hakan Ensari"]
10
- gem.email = ["code@papercavalier.com"]
11
- gem.description = %q{Data extraction tool}
12
- gem.summary = %q{Data extraction tool}
13
- gem.homepage = "https://github.com/papercavalier/craft"
9
+ gem.authors = ['Ezekiel Templin', 'Hakan Ensari']
10
+ gem.email = ['code@papercavalier.com']
11
+ gem.description = %q{Craft XML into objects}
12
+ gem.summary = %q{Craft is a data extraction tool that crafts objects
13
+ out of HTML and XML.}
14
+ gem.homepage = 'http://papercavalier.com/craft/'
14
15
 
15
16
  gem.files = `git ls-files`.split($/)
16
17
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
18
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
- gem.require_paths = ["lib"]
19
+ gem.require_paths = ['lib']
20
+
21
+ gem.add_dependency 'nokogiri', '~> 1.5'
19
22
  end
@@ -1,5 +1,91 @@
1
- require "craft/version"
1
+ require 'craft/version'
2
+ require 'nokogiri'
2
3
 
3
- module Craft
4
- # Your code goes here...
4
+ # Craft objects out of HTML and XML.
5
+ #
6
+ # Examples
7
+ #
8
+ # module Transformations
9
+ # IntegerTransform = lambda { |n| Integer n.text }
10
+ # end
11
+ #
12
+ # class Person < Craft
13
+ # include Transformations
14
+ #
15
+ # one :name, 'div.name'
16
+ # one :age, 'div.age', IntegerTransform
17
+ # many :friends, 'li.friend', Person
18
+ # end
19
+ #
20
+ class Craft
21
+ class << self
22
+ # We alias call to new so that crafted objects may nest themselves or other
23
+ # crafted objects as transformations.
24
+ alias call new
25
+
26
+ # Define a method that extracts a collection of values from a parsed
27
+ # document.
28
+ #
29
+ # name - The Symbol name of the method.
30
+ # paths - One or more String XPath of CSS queries. An optional Proc
31
+ # transformation on the extracted value may be appended. If none is
32
+ # appended, the default transformation returns the stripped String
33
+ # value of the node.
34
+ #
35
+ # Returns an Array.
36
+ def many(name, *paths)
37
+ transform = pop_transformation paths
38
+
39
+ define_method name do
40
+ @node.search(*paths).map { |node| transform.call node }
41
+ end
42
+ end
43
+
44
+ # Define a method that extracts a single value from a parsed document.
45
+ #
46
+ # name - The Symbol name of the method.
47
+ # paths - One or more String XPath of CSS queries. An optional Proc
48
+ # transformation on the extracted value may be appended. If none is
49
+ # appended, the default transformation returns the stripped String
50
+ # value of the node.
51
+ #
52
+ # Returns an Object.
53
+ def one(name, *paths)
54
+ transform = pop_transformation paths
55
+
56
+ define_method name do
57
+ transform.call @node.at(*paths)
58
+ end
59
+ end
60
+
61
+ # Parse a document.
62
+ #
63
+ # body - A String HTML or XML document.
64
+ #
65
+ # Returns an instance of its self.
66
+ def parse(body)
67
+ new Nokogiri body
68
+ end
69
+
70
+ private
71
+
72
+ def pop_transformation(array)
73
+ if array.last.respond_to? :call
74
+ array.pop
75
+ else
76
+ Module.new do
77
+ def self.call(node)
78
+ node.text.strip if node
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ # Craft a new object.
86
+ #
87
+ # node - A Nokogiri::XML::Node.
88
+ def initialize(node)
89
+ @node = node
90
+ end
5
91
  end
@@ -1,3 +1,3 @@
1
- module Craft
2
- VERSION = "0.0.1"
1
+ class Craft
2
+ VERSION = '0.0.2'
3
3
  end
@@ -0,0 +1,58 @@
1
+ require 'bundler/setup'
2
+ require 'minitest/autorun'
3
+ require 'craft'
4
+
5
+ describe Craft do
6
+ let :html do
7
+ '<html><ul><li>1</li><li>2</li>'
8
+ end
9
+
10
+ let :klass do
11
+ Class.new Craft
12
+ end
13
+
14
+ let :instance do
15
+ klass.parse html
16
+ end
17
+
18
+ describe '.many' do
19
+ it 'extracts nodes' do
20
+ klass.many 'foo', 'li'
21
+ instance.foo.must_equal %w(1 2)
22
+ end
23
+
24
+ it 'transforms' do
25
+ klass.many 'foo', 'li', ->(node) { node.text.to_i }
26
+ instance.foo.must_equal [1, 2]
27
+ end
28
+ end
29
+
30
+ describe '.one' do
31
+ it 'extracts a node' do
32
+ klass.one 'foo', 'li'
33
+ instance.foo.must_equal '1'
34
+ end
35
+
36
+ it 'transforms' do
37
+ klass.one 'foo', 'li', ->(node) { node.text.to_i }
38
+ instance.foo.must_equal 1
39
+ end
40
+
41
+ describe 'given no matches' do
42
+ before do
43
+ klass.one 'foo', 'foo'
44
+ end
45
+
46
+ it 'returns nil' do
47
+ instance.foo.must_be_nil
48
+ end
49
+ end
50
+ end
51
+
52
+ it 'nests' do
53
+ nest = Class.new Craft
54
+ nest.many 'foo', 'li'
55
+ klass.one 'foo', 'ul', nest
56
+ instance.foo.foo.must_equal %w(1 2)
57
+ end
58
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: craft
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,9 +10,25 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-10-12 00:00:00.000000000 Z
14
- dependencies: []
15
- description: Data extraction tool
13
+ date: 2012-10-18 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ requirement: !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: '1.5'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ~>
29
+ - !ruby/object:Gem::Version
30
+ version: '1.5'
31
+ description: Craft XML into objects
16
32
  email:
17
33
  - code@papercavalier.com
18
34
  executables: []
@@ -27,7 +43,8 @@ files:
27
43
  - craft.gemspec
28
44
  - lib/craft.rb
29
45
  - lib/craft/version.rb
30
- homepage: https://github.com/papercavalier/craft
46
+ - spec/craft_spec.rb
47
+ homepage: http://papercavalier.com/craft/
31
48
  licenses: []
32
49
  post_install_message:
33
50
  rdoc_options: []
@@ -39,17 +56,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
39
56
  - - ! '>='
40
57
  - !ruby/object:Gem::Version
41
58
  version: '0'
59
+ segments:
60
+ - 0
61
+ hash: -3848600058118697198
42
62
  required_rubygems_version: !ruby/object:Gem::Requirement
43
63
  none: false
44
64
  requirements:
45
65
  - - ! '>='
46
66
  - !ruby/object:Gem::Version
47
67
  version: '0'
68
+ segments:
69
+ - 0
70
+ hash: -3848600058118697198
48
71
  requirements: []
49
72
  rubyforge_project:
50
73
  rubygems_version: 1.8.23
51
74
  signing_key:
52
75
  specification_version: 3
53
- summary: Data extraction tool
54
- test_files: []
55
- has_rdoc:
76
+ summary: Craft is a data extraction tool that crafts objects out of HTML and XML.
77
+ test_files:
78
+ - spec/craft_spec.rb