RubyGems - webtagger - Versions diffs - 0.1.0 - Mend

webtagger 0.1.0

Files changed (12) hide show

data/.document ADDED

@@ -0,0 +1,5 @@
+README.rdoc
+lib/**/*.rb
+bin/*
+features/**/*.feature
+LICENSE

data/.gitignore ADDED

@@ -0,0 +1,21 @@
+## MAC OS
+.DS_Store
+## TEXTMATE
+*.tmproj
+tmtags
+## EMACS
+*~
+\#*
+.\#*
+## VIM
+*.swp
+## PROJECT::GENERAL
+coverage
+rdoc
+pkg
+## PROJECT::SPECIFIC

data/LICENSE ADDED

@@ -0,0 +1,20 @@
+Copyright (c) 2009 lfborjas
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.rdoc ADDED

@@ -0,0 +1,49 @@
+= webtagger
+Webtagger is a simple ruby gem that uses the web intelligence to extract important terms in texts, suitable for tagging them, finding the main subject or automatically building queries.
+It depends on {httparty}[http://github.com/jnunemaker/httparty] and uses the following external APIs:
+* {Yahoo term extraction}[http://developer.yahoo.com/search/content/V1/termExtraction.html]
+* {Tag-the-net}[http://tagthe.net]
+* {Alchemy API}[http://www.alchemyapi.com/api/keyword/textc.html]
+And it's written to support any API in the future.
+==Installation
+    gem install webtagger
+==Usage
+Ok, little caveat here, you might need an API-key for some of the services, so you might want to run
+    webtagger configure [service=token]
+To get instructions on how to get and save the API keys. Or, you can pass them in the tagging method, like this
+    tags = WebTagger.tag(text, service="yahoo", token="YOUR-API-KEY")
+Besides that pickle, the standard usage is really simple:
+    require 'webtagger'
+    text = "Hi, I'm text"
+    #you can use the default service (tagthe)
+    tags = WebTagger.tag(text)
+    #or choose whichever you want, if it isn't supported, falls back to the default, so you don't have
+    #to be on the look for exceptions
+    tags = WebTagger.tag(text,"yahoo")
+If something funny happens when calling an API, a `WebTaggerException` will be raised, and the instance of it will count with a `response` attribute to see what the original error response was. P.e
+If a http error happens (404, 500, etc), `nil` will be returned.
+== Note on Patches/Pull Requests
+* Fork the project.
+* Make your feature addition or bug fix.
+* Add tests for it. This is important so I don't break it in a
+  future version unintentionally.
+* Commit, do not mess with rakefile, version, or history.
+  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
+* Send me a pull request. Bonus points for topic branches.
+== Copyright
+Copyright (c) 2010 lfborjas. See LICENSE for details.

data/Rakefile ADDED

@@ -0,0 +1,55 @@
+require 'rubygems'
+require 'rake'
+begin
+  require 'jeweler'
+  Jeweler::Tasks.new do |gem|
+    gem.name = "webtagger"
+    gem.summary = %Q{Use some popular web services to extract keywords from text}
+    gem.description = %Q{Use webtagger to hace easy access to keyword extraction web services (tagthe.net, yahoo and alchemy)}
+    gem.email = "me@lfborjas.com"
+    gem.homepage = "http://github.com/lfborjas/webtagger"
+    gem.authors = ["lfborjas"]
+    gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
+    gem.add_dependency "httparty", "0.6.1"
+    gem.executables << 'webtagger'
+    # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
+  end
+  Jeweler::GemcutterTasks.new
+rescue LoadError
+  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
+end
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+  test.libs << 'lib' << 'test'
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+end
+begin
+  require 'rcov/rcovtask'
+  Rcov::RcovTask.new do |test|
+    test.libs << 'test'
+    test.pattern = 'test/**/test_*.rb'
+    test.verbose = true
+  end
+rescue LoadError
+  task :rcov do
+    abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
+  end
+end
+task :test => :check_dependencies
+task :default => :test
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+  version = File.exist?('VERSION') ? File.read('VERSION') : ""
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "webtagger #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/VERSION ADDED

	@@ -0,0 +1 @@
1	+ 0.1.0

data/bin/webtagger ADDED

@@ -0,0 +1,60 @@
+#!/usr/bin/env ruby
+require 'optparse'
+require 'fileutils'
+$:.unshift File.dirname(__FILE__) + "/../lib"
+require 'webtagger'
+service = ""
+def configure
+    WebTagger::SERVICES.each do |service|
+        next if service == "tagthe"
+        conf = File.join(ENV['HOME'], '.webtagger')
+        FileUtils.touch(conf) unless File.exist? conf
+        srvcs = {}
+        File.open(conf).each do |service_conf|
+            s, t = service_conf.split(/\s*=\s*/) rescue next
+            srvcs[s.strip.downcase] = t ? t.strip : ""
+        end
+        puts "Token for #{service.downcase} (leave blank if you don't want to set it now or you already did): "
+        token = gets
+        srvcs[service]= (token and not token.strip.empty?) ? token : srvcs[service] || ""
+        File.open(conf,'w') do |new_conf|
+            srvcs.each do |s, t|
+                new_conf.write("#{s.upcase}=#{t.strip}\n")
+            end
+        end
+    end
+end
+OptionParser.new do |opt|
+    opt.banner = "usage: webtagger [OPTIONS] [text]"
+    opt.on('-c', '--configure', String, "Add tokens for each service") do
+        configure()
+        exit
+    end
+    opt.on('-t', '--token=[service]', String, "Get the token of a specific service (or all if not specified)") do |s|
+        s="all" if not s or s.empty?
+        puts WebTagger.get_token(s)
+        exit
+    end
+    opt.on('-s', '--service=[service]', String, "Tag the text with the specified service (defaults to tagthe)") do |s|
+        s="" unless WebTagger::SERVICES.include?(s)
+        service = s
+    end
+    opt.on('-h', '--help', "Display the help screen and exit") do
+        puts opt
+        exit
+    end
+end.parse!
+#do the actual tagging:
+text = ARGV[0]
+if text and not text.empty?
+    puts "tags: %s"%WebTagger.tag(text, service).inspect[1..-2] rescue puts "Couldn't extract tags"
+else
+    puts "You must supply some text to tag!"
+end

data/lib/httparty_icebox.rb ADDED

@@ -0,0 +1,263 @@
+# = Icebox : Caching for HTTParty
+#
+# Cache responses in HTTParty models [http://github.com/jnunemaker/httparty]
+#
+# === Usage
+#
+#   class Foo
+#     include HTTParty
+#     include HTTParty::Icebox
+#     cache :store => 'file', :timeout => 600, :location => MY_APP_ROOT.join('tmp', 'cache')
+#   end
+#
+# Modeled after Martyn Loughran's APICache [http://github.com/newbamboo/api_cache]
+# and Ruby On Rails's caching [http://api.rubyonrails.org/classes/ActiveSupport/Cache.html]
+#
+# Author: Karel Minarik [www.karmi.cz]
+#
+# === Notes
+#
+# Thanks to Amit Chakradeo for pointing out response objects have to be stored marhalled on FS
+# Thanks to Marlin Forbes for pointing out the query parameters have to be included in the cache key
+#
+#
+require 'logger'
+require 'ftools'
+require 'tmpdir'
+require 'pathname'
+require 'digest/md5'
+module HTTParty #:nodoc:
+  # == Caching for HTTParty
+  # See documentation in HTTParty::Icebox::ClassMethods.cache
+  #
+  module Icebox
+    module ClassMethods
+      # Enable caching and set cache options
+      # Returns memoized cache object
+      #
+      # Following options are available, default values are in []:
+      #
+      # +store+::       Storage mechanism for cached data (memory, filesystem, your own) [memory]
+      # +timeout+::     Cache expiration in seconds [60]
+      # +logger+::      Path to logfile or logger instance [nil, silent]
+      #
+      # Any additional options are passed to the Cache constructor
+      #
+      # Usage:
+      #
+      #   # Enable caching in HTTParty, in memory, for 1 minute
+      #   cache # Use default values
+      #
+      #   # Enable caching in HTTParty, on filesystem (/tmp), for 10 minutes
+      #   cache :store => 'file', :timeout => 600, :location => '/tmp/'
+      #
+      #   # Use your own cache store (see +AbstractStore+ class below)
+      #   cache :store => 'memcached', :timeout => 600, :server => '192.168.1.1:1001'
+      #
+      def cache(options={})
+        options[:store]   ||= 'memory'
+        options[:timeout] ||= 60
+        logger = options[:logger]
+        @cache ||= Cache.new( options.delete(:store), options )
+      end
+    end
+    # When included, extend class with +cache+ method
+    # and redefine +get+ method to use cache
+    #
+    def self.included(receiver) #:nodoc:
+      receiver.extend ClassMethods
+      receiver.class_eval do
+        # Get reponse from network
+        #
+        # TODO: Why alias :new :old is not working here? Returns NoMethodError
+        #
+        def self.get_without_caching(path, options={})
+          perform_request Net::HTTP::Get, path, options
+        end
+        # Get response from cache, if available
+        #
+        def self.get_with_caching(path, options={})
+          key = path
+          key << options[:query].to_s if defined? options[:query]
+          if cache.exists?(key) and not cache.stale?(key)
+            Cache.logger.debug "CACHE -- GET #{path}#{options[:query]}"
+            return cache.get(key)
+          else
+            Cache.logger.debug "/!\\ NETWORK -- GET #{path}#{options[:query]}"
+            response = get_without_caching(path, options)
+            cache.set(key, response) if response.code == 200
+            return response
+          end
+        end
+        # Redefine original HTTParty +get+ method to use cache
+        #
+        def self.get(path, options={})
+          self.get_with_caching(path, options={})
+        end
+      end
+    end
+    # === Cache container
+    #
+    # Pass a store name ('memory', etc) to new
+    #
+    class Cache
+      attr_accessor :store
+      def initialize(store, options={})
+        self.class.logger = options[:logger]
+        @store = self.class.lookup_store(store).new(options)
+      end
+      def get(key);            @store.get encode(key) unless stale?(key);        end
+      def set(key, value);     @store.set encode(key), value;                    end
+      def exists?(key);        @store.exists? encode(key);                       end
+      def stale?(key);         @store.stale?  encode(key);                       end
+      def self.logger; @logger || default_logger; end
+      def self.default_logger; logger = ::Logger.new(STDERR); end
+      # Pass a filename (String), IO object, Logger instance or +nil+ to silence the logger
+      def self.logger=(device); @logger = device.kind_of?(::Logger) ? device : ::Logger.new(device); end
+      private
+      # Return store class based on passed name
+      def self.lookup_store(name)
+        store_name = "#{name.capitalize}Store"
+        return Store::const_get(store_name)
+      rescue NameError => e
+        raise Store::StoreNotFound, "The cache store '#{store_name}' was not found. Did you loaded any such class?"
+      end
+      def encode(key); Digest::MD5.hexdigest(key); end
+    end
+    # === Cache stores
+    #
+    module Store
+      class StoreNotFound < StandardError; end #:nodoc:
+      # ==== Abstract Store
+      # Inherit your store from this class
+      # *IMPORTANT*: Do not forget to call +super+ in your +initialize+ method!
+      #
+      class AbstractStore
+        def initialize(options={})
+          raise ArgumentError, "You need to set the :timeout parameter" unless options[:timeout]
+          @timeout = options[:timeout]
+          message =  "Cache: Using #{self.class.to_s.split('::').last}"
+          message << " in location: #{options[:location]}" if options[:location]
+          message << " with timeout #{options[:timeout]} sec"
+          Cache.logger.info message unless options[:logger].nil?
+          return self
+        end
+        %w{set get exists? stale?}.each do |method_name|
+          define_method(method_name) { raise NoMethodError, "Please implement method #{method_name} in your store class" }
+        end
+      end
+      # ==== Store objects in memory
+      # See HTTParty::Icebox::ClassMethods.cache
+      #
+      class MemoryStore < AbstractStore
+        def initialize(options={})
+          super; @store = {}; self
+        end
+        def set(key, value)
+          Cache.logger.info("Cache: set (#{key})")
+          @store[key] = [Time.now, value]; true
+        end
+        def get(key)
+          data = @store[key][1]
+          Cache.logger.info("Cache: #{data.nil? ? "miss" : "hit"} (#{key})")
+          data
+        end
+        def exists?(key)
+          !@store[key].nil?
+        end
+        def stale?(key)
+          return true unless exists?(key)
+          Time.now - created(key) > @timeout
+        end
+        private
+        def created(key)
+          @store[key][0]
+        end
+      end
+      # ==== Store objects on the filesystem
+      # See HTTParty::Icebox::ClassMethods.cache
+      #
+      class FileStore < AbstractStore
+        def initialize(options={})
+          super
+          options[:location] ||= Dir::tmpdir
+          @path = Pathname.new( options[:location] )
+          FileUtils.mkdir_p( @path )
+          self
+        end
+        def set(key, value)
+          Cache.logger.info("Cache: set (#{key})")
+          File.open( @path.join(key), 'w' ) { |file| file << Marshal.dump(value)  }
+          true
+        end
+        def get(key)
+          data = Marshal.load(File.read( @path.join(key)))
+          Cache.logger.info("Cache: #{data.nil? ? "miss" : "hit"} (#{key})")
+          data
+        end
+        def exists?(key)
+          File.exists?( @path.join(key) )
+        end
+        def stale?(key)
+          return true unless exists?(key)
+          Time.now - created(key) > @timeout
+        end
+        private
+        def created(key)
+          File.mtime( @path.join(key) )
+        end
+      end
+    end
+  end
+end
+# Major parts of this code are based on architecture of ApiCache.
+# Copyright (c) 2008 Martyn Loughran
+#
+# Other parts are inspired by the ActiveSupport::Cache in Ruby On Rails.
+# Copyright (c) 2005-2009 David Heinemeier Hansson
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/lib/webtagger.rb ADDED

@@ -0,0 +1,133 @@
+require 'fileutils'
+require 'httparty'
+require 'httparty_icebox'
+#Module for extracting keywords from text. Uses the tagthe, yahoo and alchemyAPI web services.
+#Because the yahoo and alchemy services require an API key, a command line utility is provided
+#to add those tokens for subsequent uses of the modules, storing them in <tt>~/.webtagger</tt>
+#it uses caching to avoid being throttled by the apis, via the httparty_icebox gem
+module WebTagger
+    #The services supported by this version
+    SERVICES = ['yahoo', 'alchemy', 'tagthe']
+    #A generic exception to handle api call errors
+    class WebTaggerError < RuntimeError
+        attr :response
+        def initialize(resp)
+            @response = resp
+        end
+    end
+    #Get the persisted token for a service, if no service is provided, all tokens are returned in a hash
+    #Params:
+    #+service+:: the service for which the token should be retrieved, must be one of SERVICES
+    def get_token(service="")
+        service = service.strip.downcase
+        conf = File.join(ENV['HOME'], '.webtagger')
+        return nil unless File.exist? conf
+        srvcs = {}
+        File.open(conf).each do |service_conf|
+            s, t = service_conf.split(/\s*=\s*/) rescue next
+            srvcs[s.strip.downcase] = t.strip
+        end
+        return case
+        when service == "all"
+            srvcs
+        when (SERVICES.include?(service) and srvcs[service])
+            srvcs[service]
+        else
+            nil
+        end
+    end
+    #Class to access the
+    #{yahoo term extraction web service}[http://developer.yahoo.com/search/content/V1/termExtraction.html]
+    class Yahoo
+        include HTTParty
+        include HTTParty::Icebox
+        format :json
+        base_uri "http://search.yahooapis.com/ContentAnalysisService/V1"
+        cache :store => 'memory', :timeout => 1
+        def self.tag(text, token)
+            raise "Token missing!" unless token
+            resp = post("/termExtraction", :query => {:appid => token, :context => text, :output=>'json'} )
+            if resp.has_key?('ResultSet')
+                return resp['ResultSet']['Result'] || []
+            else
+                raise WebTaggerError.new(resp), "Error in API call"
+            end
+        end
+    end
+    #Class for accessing the
+    #{alchemy keyword extraction service}[http://www.alchemyapi.com/api/keyword/textc.html]
+    class Alchemy
+        include HTTParty
+        include HTTParty::Icebox
+        format :json
+        base_uri "http://access.alchemyapi.com/calls/text"
+        cache :store => 'memory', :timeout => 1
+        def self.tag(text, token)
+            raise "Token missing!" unless token
+            resp = post("/TextGetRankedKeywords", :query => {:apikey => token, :text => text, :outputMode=>'json'} )
+            if resp['status'] != 'ERROR'
+                #it's a hash array of [{:text=>"", :relevance=>""}]
+                kws = []
+                resp['keywords'].each do |m|
+                    kws.push m["text"]
+                end
+                return kws
+            else
+                raise WebTaggerError.new(resp), "Error in API call"
+            end
+        end
+    end
+    #class for accesing the
+    #{tagthe API}[http://tagthe.net/fordevelopers]
+    class Tagthe
+        include HTTParty
+        include HTTParty::Icebox
+        format :json
+        base_uri "http://tagthe.net/api"
+        cache :store => 'memory', :timeout => 1
+        def self.tag(text)
+            resp = post("/", :query => {:text => text, :view=>'json'} )
+            if resp.has_key?('memes') and resp['memes'][0].has_key?('dimensions') \
+                and resp['memes'][0]['dimensions'].has_key?('topic')
+                return resp['memes'][0]['dimensions']['topic']
+            else
+                return []
+            end
+        end
+    end
+    #Method for obtaining keywords in a text
+    #Params:
+    #+text+:: a +String+, the text to tag
+    #+service+(optional):: a +String+, the name of the service to use, defaults to tagthe and must be one of SERVICES
+    #+token+(optional):: a token to use for calling the service (tagthe doesn't need one), keep in mind that this value,
+    #superseeds the one stored in +~/.webtagger+ and that, due to caching, might not be used if the request is done
+    #less than a minute after the last one with a different token
+    def tag(text,service="tagthe",token=nil)
+        service = service.strip.downcase
+        token = get_token(service) unless token
+        return case
+            when service == "yahoo"
+                Yahoo.tag(text, token)
+            when service == "alchemy"
+                Alchemy.tag(text, token)
+            else
+                Tagthe.tag(text)
+        end
+    end
+    module_function :tag
+    module_function :get_token
+end #of webtagger module

data/test/helper.rb ADDED

@@ -0,0 +1,10 @@
+require 'rubygems'
+require 'test/unit'
+require 'shoulda'
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+require 'webtagger'
+class Test::Unit::TestCase
+end

data/test/test_webtagger.rb ADDED

@@ -0,0 +1,7 @@
+require 'helper'
+class TestWebtagger < Test::Unit::TestCase
+  should "probably rename this file and start testing for real" do
+    flunk "hey buddy, you should probably rename this file and start testing for real"
+  end
+end

metadata ADDED

@@ -0,0 +1,109 @@
+--- !ruby/object:Gem::Specification
+name: webtagger
+version: !ruby/object:Gem::Version
+  hash: 27
+  prerelease: false
+  segments:
+  - 0
+  - 1
+  - 0
+  version: 0.1.0
+platform: ruby
+authors:
+- lfborjas
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2010-08-28 00:00:00 -06:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: thoughtbot-shoulda
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  type: :development
+  version_requirements: *id001
+- !ruby/object:Gem::Dependency
+  name: httparty
+  prerelease: false
+  requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - "="
+      - !ruby/object:Gem::Version
+        hash: 5
+        segments:
+        - 0
+        - 6
+        - 1
+        version: 0.6.1
+  type: :runtime
+  version_requirements: *id002
+description: Use webtagger to hace easy access to keyword extraction web services (tagthe.net, yahoo and alchemy)
+email: me@lfborjas.com
+executables:
+- webtagger
+- webtagger
+extensions: []
+extra_rdoc_files:
+- LICENSE
+- README.rdoc
+files:
+- .document
+- .gitignore
+- LICENSE
+- README.rdoc
+- Rakefile
+- VERSION
+- bin/webtagger
+- lib/httparty_icebox.rb
+- lib/webtagger.rb
+- test/helper.rb
+- test/test_webtagger.rb
+has_rdoc: true
+homepage: http://github.com/lfborjas/webtagger
+licenses: []
+post_install_message:
+rdoc_options:
+- --charset=UTF-8
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.7
+signing_key:
+specification_version: 3
+summary: Use some popular web services to extract keywords from text
+test_files:
+- test/helper.rb
+- test/test_webtagger.rb