RubyGems - auto_excerpt - Versions diffs - 0.7.0 → 0.7.1 - Mend

auto_excerpt 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/.gitignore CHANGED

@@ -1,5 +1 @@
-*.php
-*.zip
-extra
-*.gemspec
-pkg
+/extra/

data/CHANGELOG CHANGED

@@ -1,3 +1,5 @@
+== 0.7.1
+  * Added auto_excerpt command-line executable
 == 0.7.0 (2010-01-31)
   * Changed AutoExcerpt from a Class to a Module in order to return a String object when used
   * Removed String#clean

data/README.textile CHANGED

@@ -55,6 +55,11 @@ Default: false
 The number of [which] to skip at the beginning of the html when returned.
 Default: 0
+h3. Command Line
+*new: v0.7.1* Run @auto_excerpt@ from the command line with all of the options shown above!
+@$ auto_excerpt --words 10 "<p>Some html string</p>"@
 h2. Help out on Github!
 * Fork the project.

data/Rakefile CHANGED

@@ -30,7 +30,10 @@ end
 begin
   require 'yard'
-  YARD::Rake::YardocTask.new
+  YARD::Rake::YardocTask.new do |t|
+    t.files   = ['lib/**/*.rb', 'README.textile', 'CHANGELOG', 'LICENSE']
+    t.options = ['--any', '--extra', '--opts']
+  end
 rescue LoadError
   task :yard do
     abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.7.0
1	+ 0.7.1

data/bin/auto_excerpt ADDED

@@ -0,0 +1,59 @@
+#!/usr/bin/env ruby
+require "rubygems"
+require "auto_excerpt"
+require "optparse"
+def perr(msg)
+  $stderr.puts(msg)
+end
+def dashed(key)
+  key.to_s.gsub('_','-')
+end
+@options = {}
+options_parser = OptionParser.new do |o|
+  o.banner = "Usage: auto_excerpt [options] STRING|FILE"
+  o.separator ""
+  [
+    :characters,
+    :words,
+    :sentences,
+    :paragraphs,
+    :skip_words,
+    :skip_sentences,
+    :skip_paragraphs
+  ].each do |key|
+    o.on("--#{dashed(key)} N", Integer){ |n|  @options[key] = n }
+  end
+  [
+   :strip_html        ,
+   :strip_breaks_tabs ,
+   :strip_paragraphs
+  ].each do |key|
+    o.on("--#{dashed(key)}"){ |b| @options[key] = b }
+  end
+  o.on("--allowed_tags a,b,c", Array){|allowed| @options[:allowed_tags] = allowed }
+  o.on('--[no-]ending [STRING]'){ |s| @options[:ending] = s || nil }
+  o.on_tail('-h','--help'){ puts o; exit }
+end
+begin
+  options_parser.parse!(ARGV)
+  string_or_file = ARGV.last
+  raise(ArgumentError, "Please provide a STRING or FILE to parse.") unless string_or_file
+  string_or_file = File.read(string_or_file) if File.exist?(string_or_file)
+  puts AutoExcerpt.new(string_or_file, @options)
+rescue => e
+  perr("Error: #{e.message}\n")
+  perr(e.backtrace)
+  exit(1)
+end

data/lib/auto_excerpt.rb CHANGED

@@ -1,8 +1,12 @@
 require File.join(File.dirname(__FILE__), *%w[auto_excerpt parser])
 module AutoExcerpt
-  def self.new(text, options = {})
-    parser = Parser.new(text, options)
+  # @param [String] html A string of html.
+  # @param [Hash] optons A hash of options
+  # return [String]
+  # @see Parser#initialize List of options
+  def self.new(html, options = {})
+    parser = Parser.new(html, options)
     parser.parse
   end
 end

data/lib/auto_excerpt/parser.rb CHANGED

@@ -2,25 +2,26 @@ module AutoExcerpt
   # TODO allow for default options to be set.
   class Parser
     DEFAULTS = {
-       :characters => 0,
-       :words => 0,
-       :sentences => 0,
-       :paragraphs => 0,
+       :characters        => 0,
+       :words             => 0,
+       :sentences         => 0,
+       :paragraphs        => 0,
        # :skip_characters => 0,
-       :skip_words => 0,
-       :skip_sentences => 0,
-       :skip_paragraphs => 0,
-       :ending => '...',
-       :strip_html => false, :allowed_tags => [],
+       :skip_words        => 0,
+       :skip_sentences    => 0,
+       :skip_paragraphs   => 0,
+       :ending            => '...',
+       :strip_html        => false,
+       :allowed_tags      => [],
        :strip_breaks_tabs => false,
-       :strip_paragraphs => false
+       :strip_paragraphs  => false
     }
-    # TODO add and allowwed tags option
     PUNCTUATION_MARKS = /\!\s|\.\s|\?\s/
-    NO_CLOSE = %w( br hr img input ) # tags that do not have opposite closing tags
-    OPENING_TAG = /<([a-z0-9]{1,})\b[^>]*>/im
-    CLOSING_TAG = /<\/([a-z0-9]{1,})>/im
+    # tags that do not have opposite closing tags
+    NO_CLOSE          = %w( br hr img input )
+    OPENING_TAG       = /<([a-z0-9]{1,})\b[^>]*>/im
+    CLOSING_TAG       = /<\/([a-z0-9]{1,})>/im
     # @param [String] text The text to be excerpted
     # @param [Hash] settings The settings for creating the excerpt
@@ -36,10 +37,11 @@ module AutoExcerpt
     # @option settings [Boolean] :strip_html (false) Strip all HTML from the text before creating the excerpt
     # @option settings [Boolean] :strip_paragraphs (false) Strip all <p> tags from the HTML before creating the excerpt
     def initialize(text, settings = {})
+      # undo this and change how settings are stored
       @settings = Marshal.load(Marshal.dump(DEFAULTS)).merge(settings)
       # make our copy
-      @body = text.dup.strip
+      @body    = text.dup.strip
       @excerpt = ""
       if @settings[:strip_html]
@@ -49,10 +51,10 @@ module AutoExcerpt
       @body = clean(@body) if @settings[:strip_breaks_tabs]
       # TODO replace this with better regex
       @body.replace(@body.gsub(/<(\/|)p>/,'')) if @settings[:strip_paragraphs]
-      @charcount = strip_html(@body).length
-      @wordcount = strip_html(@body).scan(/\w+/).size
-      @sencount  = @body.split(PUNCTUATION_MARKS).size
-      @pghcount  = @body.split("</p>").size
+      @charcount             = strip_html(@body).length
+      @wordcount             = strip_html(@body).scan(/\w+/).size
+      @sencount              = @body.split(PUNCTUATION_MARKS).size
+      @pghcount              = @body.split("</p>").size
       @settings[:characters] = 150 if @settings.values_at(:characters, :words, :sentences, :paragraphs).all?{|val| val.zero? || val.nil?  }
     end
@@ -66,12 +68,14 @@ module AutoExcerpt
     alias_method :parse, :create_excerpt
     protected
+    # @api private
     attr_reader :charcount, :wordcount, :sencount, :pghcount
+    # @api private
     attr_accessor :settings, :body, :excerpt
-   # close html tags
-   # TODO make this work with new strip_html method. Improve regex
+    # close html tags
     def close_tags(text)
       # Don't bother closing tags if html is stripped since there are no tags.
       if @settings[:strip_html] && @settings[:allowed_tags].empty?
@@ -96,7 +100,7 @@ module AutoExcerpt
         end
       end
-      @excerpt = [text, @settings[:ending], tagstoclose].compact.join
+      @excerpt = [text, @settings[:ending], tagstoclose].join
     end
     def non_excerpted_text
@@ -174,4 +178,4 @@ module AutoExcerpt
        @stripped_html = html.gsub(reg,'')
     end
   end
-end
+end

data/spec/auto_excerpt_spec.rb CHANGED

@@ -4,6 +4,14 @@ require File.join(File.dirname(__FILE__), *%w[shared strip_html_spec])
 # I definitely need more tests
 describe AutoExcerpt do
+  it { should respond_to(:new) }
+  it "should return a string" do
+    AutoExcerpt.new("foo bar").should be_instance_of(String)
+  end
+end
+describe AutoExcerpt::Parser do
   it "should limit characters" do
    text = html_excerpt({:characters => 5, :ending => nil})
    stripped_text(text).length.should eql(5)
@@ -18,13 +26,13 @@ describe AutoExcerpt do
   end
   it "does not include html tags or entities in character count" do
-    AutoExcerpt.new("<h1>Hello World!</h1>", {:characters => 5, :ending => nil}).should == "<h1>Hello</h1>"
-    AutoExcerpt.new("<h1>Copyright &copy; 2010</h1>", {:characters => 11, :ending => nil}).should == "<h1>Copyright &copy;</h1>"
+    AutoExcerpt::Parser.new("<h1>Hello World!</h1>", {:characters => 5, :ending => nil}).parse.should == "<h1>Hello</h1>"
+    AutoExcerpt::Parser.new("<h1>Copyright &copy; 2010</h1>", {:characters => 11, :ending => nil}).parse.should == "<h1>Copyright &copy;</h1>"
   end
   it "should not cutoff in the middle of a word" do
-    AutoExcerpt.new("<h1>Hello World!</h1>", {:characters => 4, :ending => nil}).should == "<h1>Hello</h1>"
-    AutoExcerpt.new("<h1>Hello World!</h1>", {:characters => 7, :ending => nil}).should == "<h1>Hello World</h1>"
+    AutoExcerpt::Parser.new("<h1>Hello World!</h1>", {:characters => 4, :ending => nil}).parse.should == "<h1>Hello</h1>"
+    AutoExcerpt::Parser.new("<h1>Hello World!</h1>", {:characters => 7, :ending => nil}).parse.should == "<h1>Hello World</h1>"
   end
   it "should limit words" do
@@ -64,17 +72,17 @@ describe AutoExcerpt do
      <br />crap<b>dddd
      <a href="/activity/read_and_frwd/1251?type=comment">(Open)</a>
    }
-   text = AutoExcerpt.new(t,{:characters => 270})
+   text = AutoExcerpt::Parser.new(t,{:characters => 270}).parse
    text.match(/(<(\/|)b>)/).captures.length.should eql(2)
   end
-end
-describe AutoExcerpt, "when stripping HTML" do
+  describe "when stripping HTML" do
-  it_should_behave_like "an HTML stripper"
+    it_should_behave_like "an HTML stripper"
-  it "should not strip P tags if :paragraphs option is set" do
-    AutoExcerpt.new("<p>this is a paragraph.</p><p>this is also a paragraph.</p>",{:paragraphs => 1, :strip_html => true}).should eql("<p>this is a paragraph.</p>")
-  end
-end
+    it "should not strip P tags if :paragraphs option is set" do
+      AutoExcerpt::Parser.new("<p>this is a paragraph.</p><p>this is also a paragraph.</p>",{:paragraphs => 1, :strip_html => true}).parse.should eql("<p>this is a paragraph.</p>")
+    end
+  end
+end

data/spec/spec_helper.rb CHANGED

@@ -5,7 +5,8 @@ require "webrick/htmlutils"
 Object.class_eval do
   alias_method :old_pp, :pp
+  # so I can inspect the html in TextMate when things get hard to interpret
   def pp(str)
     str = WEBrick::HTMLUtils.escape(str) if str.is_a?(String)
     old_pp(str)
@@ -18,23 +19,21 @@ module AutoExcerptHelpers
   def html_excerpt(opts = {})
-   AutoExcerpt.new(HTML_BLOCK, opts)
+   AutoExcerpt::Parser.new(HTML_BLOCK, opts).parse
   end
   def normal_excerpt(opts = {})
-   AutoExcerpt.new(NORMAL_TEXT, opts)
+   AutoExcerpt::Parser.new(NORMAL_TEXT, opts).parse
   end
   def heavy_excerpt(opts = {})
-   AutoExcerpt.new(HEAVY_HTML_BLOCK, opts)
+   AutoExcerpt::Parser.new(HEAVY_HTML_BLOCK, opts).parse
   end
   def stripped_text(t)
    t.gsub(/<[^>]*(>+|\s*\z)/m, "")
   end
-  CRAP_HTML = ""
   NORMAL_TEXT = %{Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
     Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: auto_excerpt
 version: !ruby/object:Gem::Version
-  version: 0.7.0
+  version: 0.7.1
 platform: ruby
 authors:
 - Kabari Hendrick
@@ -9,8 +9,8 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-01-31 00:00:00 -06:00
-default_executable:
+date: 2010-02-23 00:00:00 -06:00
+default_executable: auto_excerpt
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -34,8 +34,8 @@ dependencies:
     version:
 description: Create excerpts from html formatted text. HTML tags are automatically closed.
 email: kabari@gmail.com
-executables: []
+executables:
+- auto_excerpt
 extensions: []
 extra_rdoc_files:
@@ -48,6 +48,7 @@ files:
 - README.textile
 - Rakefile
 - VERSION
+- bin/auto_excerpt
 - browser_test/browser_test.rb
 - lib/auto_excerpt.rb
 - lib/auto_excerpt/parser.rb