craft 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,29 +1,37 @@
1
1
  # Craft
2
2
 
3
- TODO: Write a gem description
3
+ Craft XML and HTML into objects.
4
4
 
5
- ## Installation
5
+ ## Examples
6
+ ```ruby
7
+ require 'craft'
8
+ require 'open-uri'
6
9
 
7
- Add this line to your application's Gemfile:
10
+ class Page < Craft
11
+ # Use CSS selectors
12
+ one :title, 'title'
8
13
 
9
- gem 'craft'
14
+ # Use XPath
15
+ many :links, 'a/@href'
10
16
 
11
- And then execute:
17
+ # Perform transforms on returned nodes
18
+ many :images, 'img', lambda { |img| img.attr('src').upcase }
19
+ end
12
20
 
13
- $ bundle
21
+ page = Page.parse open('http://www.google.com')
14
22
 
15
- Or install it yourself as:
23
+ page.title #=> 'Google'
24
+ page.links #=> ['http://www.google.com/imghp?hl=en&tab=wi', ...]
25
+ page.images #=> ['/LOGOS/2012/MOBY_DICK12-HP.JPG']
16
26
 
17
- $ gem install craft
27
+ class Script < Craft
28
+ one :body, 'text()'
29
+ end
18
30
 
19
- ## Usage
31
+ class Page < Craft
32
+ many :scripts, 'script', Script
33
+ end
20
34
 
21
- TODO: Write usage instructions here
22
-
23
- ## Contributing
24
-
25
- 1. Fork it
26
- 2. Create your feature branch (`git checkout -b my-new-feature`)
27
- 3. Commit your changes (`git commit -am 'Add some feature'`)
28
- 4. Push to the branch (`git push origin my-new-feature`)
29
- 5. Create new Pull Request
35
+ page = Page.parse open('http://www.google.com')
36
+ page.scripts[0].body #=> 'window.google=...'
37
+ ```
data/Rakefile CHANGED
@@ -1 +1,8 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.pattern = "spec/*_spec.rb"
6
+ end
7
+
8
+ task :default => :test
@@ -4,16 +4,19 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'craft/version'
5
5
 
6
6
  Gem::Specification.new do |gem|
7
- gem.name = "craft"
7
+ gem.name = 'craft'
8
8
  gem.version = Craft::VERSION
9
- gem.authors = ["Ezekiel Templin", "Hakan Ensari"]
10
- gem.email = ["code@papercavalier.com"]
11
- gem.description = %q{Data extraction tool}
12
- gem.summary = %q{Data extraction tool}
13
- gem.homepage = "https://github.com/papercavalier/craft"
9
+ gem.authors = ['Ezekiel Templin', 'Hakan Ensari']
10
+ gem.email = ['code@papercavalier.com']
11
+ gem.description = %q{Craft XML into objects}
12
+ gem.summary = %q{Craft is a data extraction tool that crafts objects
13
+ out of HTML and XML.}
14
+ gem.homepage = 'http://papercavalier.com/craft/'
14
15
 
15
16
  gem.files = `git ls-files`.split($/)
16
17
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
18
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
- gem.require_paths = ["lib"]
19
+ gem.require_paths = ['lib']
20
+
21
+ gem.add_dependency 'nokogiri', '~> 1.5'
19
22
  end
@@ -1,5 +1,91 @@
1
- require "craft/version"
1
+ require 'craft/version'
2
+ require 'nokogiri'
2
3
 
3
- module Craft
4
- # Your code goes here...
4
+ # Craft objects out of HTML and XML.
5
+ #
6
+ # Examples
7
+ #
8
+ # module Transformations
9
+ # IntegerTransform = lambda { |n| Integer n.text }
10
+ # end
11
+ #
12
+ # class Person < Craft
13
+ # include Transformations
14
+ #
15
+ # one :name, 'div.name'
16
+ # one :age, 'div.age', IntegerTransform
17
+ # many :friends, 'li.friend', Person
18
+ # end
19
+ #
20
+ class Craft
21
+ class << self
22
+ # We alias call to new so that crafted objects may nest themselves or other
23
+ # crafted objects as transformations.
24
+ alias call new
25
+
26
+ # Define a method that extracts a collection of values from a parsed
27
+ # document.
28
+ #
29
+ # name - The Symbol name of the method.
30
+ # paths - One or more String XPath of CSS queries. An optional Proc
31
+ # transformation on the extracted value may be appended. If none is
32
+ # appended, the default transformation returns the stripped String
33
+ # value of the node.
34
+ #
35
+ # Returns an Array.
36
+ def many(name, *paths)
37
+ transform = pop_transformation paths
38
+
39
+ define_method name do
40
+ @node.search(*paths).map { |node| transform.call node }
41
+ end
42
+ end
43
+
44
+ # Define a method that extracts a single value from a parsed document.
45
+ #
46
+ # name - The Symbol name of the method.
47
+ # paths - One or more String XPath of CSS queries. An optional Proc
48
+ # transformation on the extracted value may be appended. If none is
49
+ # appended, the default transformation returns the stripped String
50
+ # value of the node.
51
+ #
52
+ # Returns an Object.
53
+ def one(name, *paths)
54
+ transform = pop_transformation paths
55
+
56
+ define_method name do
57
+ transform.call @node.at(*paths)
58
+ end
59
+ end
60
+
61
+ # Parse a document.
62
+ #
63
+ # body - A String HTML or XML document.
64
+ #
65
+ # Returns an instance of its self.
66
+ def parse(body)
67
+ new Nokogiri body
68
+ end
69
+
70
+ private
71
+
72
+ def pop_transformation(array)
73
+ if array.last.respond_to? :call
74
+ array.pop
75
+ else
76
+ Module.new do
77
+ def self.call(node)
78
+ node.text.strip if node
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ # Craft a new object.
86
+ #
87
+ # node - A Nokogiri::XML::Node.
88
+ def initialize(node)
89
+ @node = node
90
+ end
5
91
  end
@@ -1,3 +1,3 @@
1
- module Craft
2
- VERSION = "0.0.1"
1
+ class Craft
2
+ VERSION = '0.0.2'
3
3
  end
@@ -0,0 +1,58 @@
1
+ require 'bundler/setup'
2
+ require 'minitest/autorun'
3
+ require 'craft'
4
+
5
+ describe Craft do
6
+ let :html do
7
+ '<html><ul><li>1</li><li>2</li>'
8
+ end
9
+
10
+ let :klass do
11
+ Class.new Craft
12
+ end
13
+
14
+ let :instance do
15
+ klass.parse html
16
+ end
17
+
18
+ describe '.many' do
19
+ it 'extracts nodes' do
20
+ klass.many 'foo', 'li'
21
+ instance.foo.must_equal %w(1 2)
22
+ end
23
+
24
+ it 'transforms' do
25
+ klass.many 'foo', 'li', ->(node) { node.text.to_i }
26
+ instance.foo.must_equal [1, 2]
27
+ end
28
+ end
29
+
30
+ describe '.one' do
31
+ it 'extracts a node' do
32
+ klass.one 'foo', 'li'
33
+ instance.foo.must_equal '1'
34
+ end
35
+
36
+ it 'transforms' do
37
+ klass.one 'foo', 'li', ->(node) { node.text.to_i }
38
+ instance.foo.must_equal 1
39
+ end
40
+
41
+ describe 'given no matches' do
42
+ before do
43
+ klass.one 'foo', 'foo'
44
+ end
45
+
46
+ it 'returns nil' do
47
+ instance.foo.must_be_nil
48
+ end
49
+ end
50
+ end
51
+
52
+ it 'nests' do
53
+ nest = Class.new Craft
54
+ nest.many 'foo', 'li'
55
+ klass.one 'foo', 'ul', nest
56
+ instance.foo.foo.must_equal %w(1 2)
57
+ end
58
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: craft
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,9 +10,25 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-10-12 00:00:00.000000000 Z
14
- dependencies: []
15
- description: Data extraction tool
13
+ date: 2012-10-18 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ requirement: !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: '1.5'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ~>
29
+ - !ruby/object:Gem::Version
30
+ version: '1.5'
31
+ description: Craft XML into objects
16
32
  email:
17
33
  - code@papercavalier.com
18
34
  executables: []
@@ -27,7 +43,8 @@ files:
27
43
  - craft.gemspec
28
44
  - lib/craft.rb
29
45
  - lib/craft/version.rb
30
- homepage: https://github.com/papercavalier/craft
46
+ - spec/craft_spec.rb
47
+ homepage: http://papercavalier.com/craft/
31
48
  licenses: []
32
49
  post_install_message:
33
50
  rdoc_options: []
@@ -39,17 +56,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
39
56
  - - ! '>='
40
57
  - !ruby/object:Gem::Version
41
58
  version: '0'
59
+ segments:
60
+ - 0
61
+ hash: -3848600058118697198
42
62
  required_rubygems_version: !ruby/object:Gem::Requirement
43
63
  none: false
44
64
  requirements:
45
65
  - - ! '>='
46
66
  - !ruby/object:Gem::Version
47
67
  version: '0'
68
+ segments:
69
+ - 0
70
+ hash: -3848600058118697198
48
71
  requirements: []
49
72
  rubyforge_project:
50
73
  rubygems_version: 1.8.23
51
74
  signing_key:
52
75
  specification_version: 3
53
- summary: Data extraction tool
54
- test_files: []
55
- has_rdoc:
76
+ summary: Craft is a data extraction tool that crafts objects out of HTML and XML.
77
+ test_files:
78
+ - spec/craft_spec.rb