cocoa-xml 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ === 0.4.3 / 2010-01-26
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,20 @@
1
+ History.rdoc
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ lib/cocoa-xml.rb
6
+ lib/cocoa-xml/nodeset.rb
7
+ lib/cocoa-xml/version.rb
8
+ lib/cocoa-xml/nsxmlnode_extras.rb
9
+ lib/cocoa-xml/nsxmldocument_extras.rb
10
+ lib/nokogiri/css.rb
11
+ lib/nokogiri/syntax_error.rb
12
+ lib/nokogiri/css/generated_parser.rb
13
+ lib/nokogiri/css/generated_tokenizer.rb
14
+ lib/nokogiri/css/node.rb
15
+ lib/nokogiri/css/parser.rb
16
+ lib/nokogiri/css/parser.y
17
+ lib/nokogiri/css/syntax_error.rb
18
+ lib/nokogiri/css/tokenizer.rb
19
+ lib/nokogiri/css/tokenizer.rex
20
+ lib/nokogiri/css/xpath_visitor.rb
@@ -0,0 +1,105 @@
1
+ = Cocoa-XML
2
+
3
+ * http://github.com/cehoffman/cocoa-xml
4
+ * http://cehoffman.github.com/cocoa-xml
5
+
6
+ == DESCRIPTION:
7
+
8
+ Cocoa-xml provides a more ruby like interface to Cocoa's NSXMLDocument
9
+ and classes that inherit from NSXMLNode. It provides access to XPath,
10
+ XQuery, and CSS selectors for searching documents.
11
+
12
+ == FEATURES/PROBLEMS:
13
+
14
+ * Cocoa-xml uses native Cocoa xml processing facilities.
15
+ * CSS, XPath, and XQuery can be used to search a document
16
+ * Broken HTML documents can be processed thanks to NSXMLDocument
17
+
18
+ == SYNOPSIS:
19
+
20
+ require 'cocoa-xml'
21
+
22
+ doc = CocoaXML::HTML("http://www.google.com/search?q=cehoffman")
23
+
24
+ doc.css('h3.r a.l').each do |link|
25
+ puts link.text
26
+ end
27
+
28
+ doc.xpath('//h3/a[@class="l"]').each do |link|
29
+ puts link.text
30
+ end
31
+
32
+ doc.xquery('data(//h3/a[@class="l"]')).each do |link|
33
+ puts link
34
+ end
35
+
36
+ == REQUIREMENTS:
37
+
38
+ * MacRuby 0.6 (development version)
39
+ * OS X Snow Leopard (only version supported by MacRuby currently)
40
+
41
+ == INSTALL:
42
+
43
+ $ sudo gem install cocoa-xml
44
+
45
+ == DEVELOPERS:
46
+
47
+ It is advised to use the current development version of {MacRuby}[http://www.macruby.com] from
48
+ the source tree.
49
+
50
+ == CREDITS:
51
+
52
+ CSS selector support is taken from Nokogiri.
53
+
54
+ == LICENSE:
55
+
56
+ (The MIT License)
57
+
58
+ Copyright (c) 2010 Chris Hoffman
59
+
60
+ Permission is hereby granted, free of charge, to any person obtaining
61
+ a copy of this software and associated documentation files (the
62
+ 'Software'), to deal in the Software without restriction, including
63
+ without limitation the rights to use, copy, modify, merge, publish,
64
+ distribute, sublicense, and/or sell copies of the Software, and to
65
+ permit persons to whom the Software is furnished to do so, subject to
66
+ the following conditions:
67
+
68
+ The above copyright notice and this permission notice shall be
69
+ included in all copies or substantial portions of the Software.
70
+
71
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
72
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
73
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
74
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
75
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
76
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
77
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
78
+
79
+ === Nokogiri License
80
+
81
+ (The MIT License)
82
+
83
+ Copyright (c) 2008 - 2009:
84
+
85
+ * {Aaron Patterson}[http://tenderlovemaking.com]
86
+ * {Mike Dalessio}[http://mike.daless.io]
87
+
88
+ Permission is hereby granted, free of charge, to any person obtaining
89
+ a copy of this software and associated documentation files (the
90
+ 'Software'), to deal in the Software without restriction, including
91
+ without limitation the rights to use, copy, modify, merge, publish,
92
+ distribute, sublicense, and/or sell copies of the Software, and to
93
+ permit persons to whom the Software is furnished to do so, subject to
94
+ the following conditions:
95
+
96
+ The above copyright notice and this permission notice shall be
97
+ included in all copies or substantial portions of the Software.
98
+
99
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
100
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
101
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
102
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
103
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
104
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
105
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,23 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require "./lib/cocoa-xml/version.rb"
6
+
7
+ Hoe.plugin :gemcutter
8
+ Hoe.plugin :clean
9
+ Hoe.plugin :git
10
+ Hoe.plugin :yard
11
+
12
+ Hoe.spec 'cocoa-xml' do
13
+ self.version = ::CocoaXML::Version
14
+ developer('Chris Hoffman', 'cehoffman@gmail.com')
15
+
16
+ self.rubyforge_name = 'cocoa-xml'
17
+
18
+ self.yard_title = "Cocoa-XML"
19
+ self.yard_markup = 'rdoc'
20
+ self.yard_opts = ['--no-private']
21
+ end
22
+
23
+ # vim: syntax=ruby
@@ -0,0 +1,48 @@
1
+ framework 'Cocoa'
2
+
3
+ require "cocoa-xml/version"
4
+ require "cocoa-xml/nodeset"
5
+ require "cocoa-xml/nsxmldocument_extras"
6
+ require "cocoa-xml/nsxmlnode_extras"
7
+ require "nokogiri/css"
8
+
9
+ module CocoaXML
10
+ # Parse an input HTML source
11
+ #
12
+ # @param [url, NSURL, #read, #to_str] source a url as a string or NSURL,
13
+ # object that responds to #read, or #to_str
14
+ # @return [NSXMLDocument] An NSXMLDocument set to interpret source as HTML
15
+ def self.HTML(source)
16
+ parse source, NSXMLDocumentTidyHTML
17
+ end
18
+
19
+ # Parse an input XML source
20
+ #
21
+ # @param [url, NSURL, #read, #to_str] source a url as a string or NSURL,
22
+ # object that responds to #read, or #to_str
23
+ # @return [NSXMLDocument] An NSXMLDocument set to inperpret source as XML
24
+ def self.XML(source)
25
+ parse source, NSXMLDocumentTidyXML
26
+ end
27
+
28
+ private
29
+ # Parse an HTML or XML source
30
+ #
31
+ # @param [url, NSURL, #read, #to_str] source a url as a string or NSURL,
32
+ # object that responds to #read, or #to_str
33
+ # @param [Number] Constant determing how to interpret input source, xml or
34
+ # html
35
+ # @return [NSXMLDocument]
36
+ def self.parse(source, type)
37
+ error = Pointer.new :object
38
+
39
+ url = (source.is_a?(NSURL) && source) || NSURL.URLWithString(source.to_str) if source.respond_to?(:to_str)
40
+ source = source.read if source.respond_to?(:read)
41
+
42
+ unless url.nil?
43
+ ::NSXMLDocument.alloc.initWithContentsOfURL url, options: type, error: error
44
+ else
45
+ ::NSXMLDocument.alloc.initWithXMLString source, options: type, error: error
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,57 @@
1
+ module CocoaXML
2
+ class NodeSet < Array
3
+ # Collect all the texts of nodes in set and join together
4
+ #
5
+ # @return [String] single string containing text of each element
6
+ def text
7
+ collect { |node| (node.respond_to?(:to_str) && node.to_str) || node.text }.flatten.join
8
+ end
9
+ alias :inner_text :text
10
+
11
+ # Collect all the #to_s representations of elements in array
12
+ #
13
+ # @return [String] single string containing #to_s of each element
14
+ def to_s
15
+ collect { |node| node.to_s }.join
16
+ end
17
+
18
+ # Perform selector on each element in set
19
+ #
20
+ # @param [String] selector css selector to use on each node
21
+ # @return [NodeSet<NSXMLNode, String>] new set resulting from
22
+ # performing selector on each node in set
23
+ # @todo This will bomb if the node has String elements in it
24
+ def css(selector)
25
+ xpath ::Nokogiri::CSS::xpath_for(selector, :prefix => ".//").join
26
+ end
27
+
28
+ # Perform XQuery on each node in set
29
+ #
30
+ # @param [String] query xquery to perform on each node in set
31
+ # @return [NodeSet<NSXMLNode, String>] results of performing
32
+ # query
33
+ # @todo This will bomb if the node has String elements in it
34
+ def xquery(query)
35
+ query.sub! %r{^//}, '' # Root searches to start from nodes
36
+ NodeSet.new(collect { |node| node.xquery(query) }.flatten)
37
+ end
38
+
39
+ # Perform XPath selection on each node in set
40
+ #
41
+ # @param [String] path xpath to follow on each node
42
+ # @return [NodeSet<NSXMLNode>] results of path on each node
43
+ # @todo This will bomb if the node has String elements in it
44
+ def xpath(query)
45
+ query.sub! %r{^//}, '' # Roots searches to start from nodes
46
+ NodeSet.new(collect { |node| node.xpath(query) }.flatten)
47
+ end
48
+
49
+ # Get the value of the attribute for each node in set
50
+ #
51
+ # @param [String] attr attribute to search for on each node
52
+ # @return [Array<String, nil>] value of attribute for each node
53
+ def [](attr)
54
+ collect { |node| node[attr] }.flatten
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,48 @@
1
+ module CocoaXML
2
+ module NSXMLDocumentExtras
3
+ # @private
4
+ def self.included(klass)
5
+ klass.class_eval do
6
+ alias :encoding :characterEncoding
7
+ alias :encoding= :setCharacterEncoding
8
+ end
9
+ end
10
+
11
+ # Encoding of document
12
+ #
13
+ # @see http://www.iana.org/assignments/character-sets Valid Encoding Specifiers
14
+ #
15
+ # @return [NSString] encoding of document
16
+ def encoding
17
+ # Implemented as alias to characterEncoding
18
+ end
19
+
20
+ # Set encoding of document
21
+ #
22
+ # @see file:///Developer/Documentation/DocSets/com.apple.adc.documentation.AppleSnowLeopard.CoreReference.docset/Contents/Resources/Documents/documentation/Cocoa/Reference/Foundation/Classes/NSXMLDocument_Class/Reference/Reference.html#//apple_ref/occ/instm/NSXMLDocument/setCharacterEncoding: Developer Documentation
23
+ # @see http://developer.apple.com/mac/library/documentation/Cocoa/Reference/Foundation/Classes/NSXMLDocument_Class/NSXMLDocument_Class.pdf Apple NSXMLDocument PDF
24
+ #
25
+ # @param [NSString] enc valid character encoding
26
+ # @return [void]
27
+ def encoding=(enc)
28
+ # Implemented as an alias to setCharacterEncoding
29
+ end
30
+
31
+ # Determine if output of document is treated as HTML, e.g. <br> style tags
32
+ def html?
33
+ documentContentKind == NSXMLDocumentHTMLKind
34
+ end
35
+
36
+ # Determine if output of document is treated as XHTML, e.g. <br/> style tags
37
+ def xhtml?
38
+ documentContentKind == NSXMLDocumentXHTMLKind
39
+ end
40
+
41
+ # Determine if output of document is treated as XML
42
+ def xml?
43
+ documentContentKind == NSXMLDocumentXMLKind
44
+ end
45
+ end
46
+ end
47
+
48
+ ::NSXMLDocument.send :include, CocoaXML::NSXMLDocumentExtras
@@ -0,0 +1,106 @@
1
+ module CocoaXML
2
+ # These are a set of methods added onto the Cocoa NSXMLNode class and children.
3
+ module NSXMLNodeExtras
4
+ # Search from this node down using a css selector
5
+ #
6
+ # @param [String] selector selector used to select nodes from document
7
+ # @return [NodeSet<NSXMLNode>] array of nodes matched by selector
8
+ def css(selector)
9
+ xpath ::Nokogiri::CSS::xpath_for(selector, :prefix => ".//").join
10
+ end
11
+
12
+ # Search document using provided path
13
+ #
14
+ # @param [String] path path used to select nodes from document
15
+ # @return [NodeSet<NSXMLNode>] array of nodes matched by selector
16
+ def xpath(path)
17
+ error = Pointer.new(:object)
18
+ results = nodesForXPath path, error: error
19
+
20
+ return NodeSet.new(results) if error[0].nil?
21
+ end
22
+
23
+ # Process document using provided query
24
+ #
25
+ # @param [String] query query used process information in document
26
+ # @return [NodeSet<NSXMLNode, String>] results depends on
27
+ # query. Notice that unlike {#xpath} basic types can also be returned.
28
+ def xquery(query)
29
+ error = Pointer.new(:object)
30
+ results = objectsForXQuery query, error: error
31
+
32
+ return NodeSet.new(results) if error[0].nil?
33
+
34
+ #TODO Do something with the error
35
+ end
36
+
37
+ # Get the contained text from this node and children nodes
38
+ #
39
+ # @return [String] text contents of node
40
+ def text
41
+ xquery('data(.)').join
42
+ end
43
+ alias :inner_text :text
44
+
45
+ # Get the value of an attribute of node
46
+ #
47
+ # @param [String] attr attribute of node to query
48
+ # @return [String, nil] string value of attribute or nil if no attribute
49
+ def [](attr)
50
+ xquery("data(@#{attr})").pop
51
+ end
52
+
53
+ # Set the value of an attribute of node
54
+ #
55
+ # @param [String] attr attribute of node to set
56
+ # @param [#to_s] value value to set attribute to
57
+ # @return ????
58
+ # @todo Find out what this funtion will return
59
+ def []=(attr, value)
60
+ node = attributeForName(attr.to_s)
61
+ node && node.setStringValue(value) || addAttribute(::NSXMLNode.attributeWithName(attr.to_s, stringValue: value))
62
+ end
63
+
64
+ # @private
65
+ def attribute(attr)
66
+ attributeForName(attr.to_s)
67
+ end
68
+
69
+ # @return [String] the xml of this node including children nodes with proper indentation
70
+ def to_s
71
+ XMLStringWithOptions NSXMLNodePrettyPrint
72
+ end
73
+
74
+ # @private
75
+ def self.included(klass)
76
+ klass.class_eval do
77
+ alias :remove :detach
78
+ alias :unlink :detach
79
+ alias :path :XPath
80
+
81
+ # TODO: Why is this not working
82
+ alias :old_children :children
83
+ def children
84
+ NodeSet.new(old_children)
85
+ end
86
+ end
87
+ end
88
+
89
+ # Remove this node from its parent
90
+ #
91
+ # @return [self] this node
92
+ def remove
93
+ # Implemented as an alias to detach
94
+ end
95
+ alias :unlink :remove
96
+
97
+ # An XPath formula to reach this node
98
+ #
99
+ # @return [String] XPath to this node
100
+ def path
101
+ # Implemented as an alias to :XPath
102
+ end
103
+ end
104
+ end
105
+
106
+ ::NSXMLNode.send :include, CocoaXML::NSXMLNodeExtras
@@ -0,0 +1,3 @@
1
+ module CocoaXML
2
+ Version = '0.4.4'
3
+ end
@@ -0,0 +1,27 @@
1
+ require 'nokogiri/css/node'
2
+ require 'nokogiri/css/xpath_visitor'
3
+ require 'nokogiri/css/generated_parser'
4
+ require 'nokogiri/css/generated_tokenizer'
5
+ require 'nokogiri/css/tokenizer'
6
+ require 'nokogiri/css/parser'
7
+ require 'nokogiri/css/syntax_error'
8
+
9
+ module Nokogiri
10
+ # Modules to convert CSS selectors to valid XPath
11
+ # @see http://nokogiri.org/Nokogiri/CSS.html Nokogiri Documentation
12
+ module CSS
13
+ #class << self
14
+ ###
15
+ # Parse this CSS selector in +selector+. Returns an AST.
16
+ def self.parse selector
17
+ Parser.new.parse selector
18
+ end
19
+
20
+ ###
21
+ # Get the XPath for +selector+.
22
+ def self.xpath_for selector, options={}
23
+ Parser.new(options[:ns] || {}).xpath_for selector, options
24
+ end
25
+ #end
26
+ end
27
+ end