RubyGems - scrapbot - Versions diffs - 0.0.1 - Mend

scrapbot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/.gitignore ADDED

@@ -0,0 +1,5 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*
+nbproject/

data/Gemfile ADDED

@@ -0,0 +1,4 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in scrapbot.gemspec
+gemspec

data/README.rdoc ADDED

@@ -0,0 +1,15 @@
+= Scrapbot
+== Introduction
+    Bot which can watch give urls and copy them on your computer if detect any changes
+== Instalation
+    gem install scrapbot
+== Usage
+    I put example configuration file in example folder
+    scrapbot watch --url_file=urls.txt --conf_file=conf.yml

data/Rakefile ADDED

	@@ -0,0 +1,2 @@
1	+ require 'bundler'
2	+ Bundler::GemHelper.install_tasks

data/bin/scrapbot ADDED

@@ -0,0 +1,52 @@
+#!/usr/bin/env ruby
+require File.expand_path('../../lib/scrapbot', __FILE__)
+require 'thor'
+require 'settingslogic'
+require 'configliere'
+module Scrapbot
+  Settings.use :commandline, :config_file
+  class Init < Thor
+    attr_accessor :urls
+    desc "server", "start watching urls"
+    #    Url file
+    method_options :url_file => :string, :required => true
+    method_options :conf_file => :string, :required => true
+    def watch
+      load_settings(File.join(Dir.getwd,"/#{options[:conf_file]}"))
+      load_urls(File.join(Dir.getwd,"/#{options[:url_file]}"))
+      s = Scrapbot::Downloader.new(self.urls)
+      s.start
+    end
+    private
+    def load_settings(path)
+      Settings.read path
+      Settings.resolve!
+    end
+    def load_urls(path)
+      #read and parse by line txt file
+      self.urls = File.open(path).readlines.map(&:strip).reject(&:empty?)
+    end
+  end
+end
+Scrapbot::Init.start
+# scrapbot watch --url_file="www.wp.pl"

data/example/conf.yml ADDED

	@@ -0,0 +1 @@
1	+ :path_to_git : /home/lewy/programowanie/projekty/downloads/

data/example/urls.txt ADDED

	@@ -0,0 +1,2 @@
1	+ www.google.com
2	+ www.msn.com

data/lib/scrapbot.rb ADDED

@@ -0,0 +1,7 @@
+require 'scrapbot/downloader'
+require 'typhoeus'
+module Scrapbot
+  PATH = '/home/lewy/programowanie/projekty/downloads'
+end

data/lib/scrapbot/downloader.rb ADDED

@@ -0,0 +1,68 @@
+require_relative 'storage'
+require 'settingslogic'
+module Scrapbot
+  #  This class is responsible for downloading page
+  class Downloader
+    attr_accessor :urls
+    def initialize(urls)
+      self.urls = urls
+    end
+    def start
+      s = Storage.new
+      urls_list do |response|
+        s.magazine << response
+      end
+      s.save_all
+    end
+    private
+    def urls_list(&block)
+      self.urls.each do |url|
+        yield Typhoeus::Request.get(url)
+      end
+    end
+  end
+  #  module Downloader
+  #    def archive(urls = [],options = {})
+  #      #      This method download given urls
+  #      urls.each do |url|
+  #        response = Typhoeus::Request.get(url)
+  #        Storage.new(response).save
+  #      end
+  #    end
+  #
+  #
+  #    # Create or switch to branch named response.effective_url
+  #    # Grit::Repo(path_to_repo)
+  #
+  #
+  #    class Storage
+  #      attr_accessor :response
+  #
+  #      def initialize(typh_url)
+  #        self.response = typh_url
+  #      end
+  #
+  #      def save
+  #
+  #      end
+  #
+  #      private
+  #
+  #      def find_or_create_branch
+  #
+  #      end
+  #    end
+  #  end
+end

data/lib/scrapbot/storage.rb ADDED

@@ -0,0 +1,28 @@
+require 'grit'
+module Scrapbot
+  class Storage
+    attr_accessor :magazine
+    def initialize
+      self.magazine = []
+    end
+    def save_all
+      self.magazine.each do |response|
+        File.open(save_path,'wb') do |f|
+          f.write(response.body)
+        end
+      end
+    end
+    private
+    def save_path
+      File.join(Settings[:path_to_git],"index.html")
+    end
+  end
+end

data/lib/scrapbot/version.rb ADDED

@@ -0,0 +1,3 @@
+module Scrapbot
+  VERSION = "0.0.1"
+end

data/scrapbot.gemspec ADDED

@@ -0,0 +1,27 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "scrapbot/version"
+Gem::Specification.new do |s|
+  s.name        = "scrapbot"
+  s.version     = Scrapbot::VERSION
+  s.platform    = Gem::Platform::RUBY
+  s.authors     = ["Pawel Lewinski"]
+  s.email       = ["lewy313@gmail.com"]
+  s.homepage    = ""
+  s.summary     = %q{Scrapping Bot}
+  s.description = %q{Bot for archive pages}
+  s.add_development_dependency "grit"
+  s.add_development_dependency "typhoeus", ">=0.2"
+  s.add_development_dependency "configliere", ">=0.3"
+  s.executables = ["scrapbot"]
+  s.rubyforge_project = "scrapbot"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+end

metadata ADDED

@@ -0,0 +1,116 @@
+--- !ruby/object:Gem::Specification
+name: scrapbot
+version: !ruby/object:Gem::Version
+  prerelease: false
+  segments:
+  - 0
+  - 0
+  - 1
+  version: 0.0.1
+platform: ruby
+authors:
+- Pawel Lewinski
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-03-17 00:00:00 +01:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: grit
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+        - 0
+        version: "0"
+  type: :development
+  version_requirements: *id001
+- !ruby/object:Gem::Dependency
+  name: typhoeus
+  prerelease: false
+  requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+        - 0
+        - 2
+        version: "0.2"
+  type: :development
+  version_requirements: *id002
+- !ruby/object:Gem::Dependency
+  name: configliere
+  prerelease: false
+  requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+        - 0
+        - 3
+        version: "0.3"
+  type: :development
+  version_requirements: *id003
+description: Bot for archive pages
+email:
+- lewy313@gmail.com
+executables:
+- scrapbot
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- README.rdoc
+- Rakefile
+- bin/scrapbot
+- example/conf.yml
+- example/urls.txt
+- lib/scrapbot.rb
+- lib/scrapbot/downloader.rb
+- lib/scrapbot/storage.rb
+- lib/scrapbot/version.rb
+- scrapbot.gemspec
+has_rdoc: true
+homepage: ""
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project: scrapbot
+rubygems_version: 1.3.7
+signing_key:
+specification_version: 3
+summary: Scrapping Bot
+test_files: []