jamnagar 1.3.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rspec +3 -0
  4. data/.vagrant/machines/default/virtualbox/action_provision +1 -0
  5. data/.vagrant/machines/default/virtualbox/action_set_name +1 -0
  6. data/.vagrant/machines/default/virtualbox/id +1 -0
  7. data/.vagrant/machines/default/virtualbox/index_uuid +1 -0
  8. data/.vagrant/machines/default/virtualbox/private_key +27 -0
  9. data/.vagrant/machines/default/virtualbox/synced_folders +1 -0
  10. data/Gemfile +4 -0
  11. data/Gemfile.lock +64 -0
  12. data/LICENSE.txt +22 -0
  13. data/README.md +31 -0
  14. data/Rakefile +2 -0
  15. data/UTF-8-test.txt +0 -0
  16. data/Vagrantfile +67 -0
  17. data/blinky_tests +3 -0
  18. data/bootstrap.sh +36 -0
  19. data/jamnagar.gemspec +33 -0
  20. data/lib/jamnagar/adapters/adapter.rb +18 -0
  21. data/lib/jamnagar/adapters/file_system_adapter.rb +9 -0
  22. data/lib/jamnagar/adapters/mongo_adapter.rb +54 -0
  23. data/lib/jamnagar/adapters/persistent_store_adapter.rb +9 -0
  24. data/lib/jamnagar/initializers/mongo.rb +6 -0
  25. data/lib/jamnagar/materials/item.rb +15 -0
  26. data/lib/jamnagar/materials/ore.rb +29 -0
  27. data/lib/jamnagar/producers/producer.rb +5 -0
  28. data/lib/jamnagar/producers/rss_producer.rb +6 -0
  29. data/lib/jamnagar/producers/twitter_producer.rb +6 -0
  30. data/lib/jamnagar/refineries/content_refinery.rb +46 -0
  31. data/lib/jamnagar/refiners/contributor_detail.rb +17 -0
  32. data/lib/jamnagar/refiners/duplicate_detection.rb +23 -0
  33. data/lib/jamnagar/refiners/meta_data_extraction.rb +15 -0
  34. data/lib/jamnagar/refiners/popularity_incrementation.rb +22 -0
  35. data/lib/jamnagar/refiners/primary_key_generation.rb +20 -0
  36. data/lib/jamnagar/refiners/refiner.rb +22 -0
  37. data/lib/jamnagar/refiners/source_detail.rb +17 -0
  38. data/lib/jamnagar/refiners/url_expansion.rb +23 -0
  39. data/lib/jamnagar/refiners/utm_stripping.rb +13 -0
  40. data/lib/jamnagar/storage/basic_store.rb +34 -0
  41. data/lib/jamnagar/storage/contributor_store.rb +35 -0
  42. data/lib/jamnagar/storage/in_memory_cache.rb +17 -0
  43. data/lib/jamnagar/storage/item_store.rb +21 -0
  44. data/lib/jamnagar/storage/refined_item_store.rb +17 -0
  45. data/lib/jamnagar/storage/source_store.rb +35 -0
  46. data/lib/jamnagar/utilities/duplicate_detector.rb +12 -0
  47. data/lib/jamnagar/utilities/meta_data_extractor.rb +16 -0
  48. data/lib/jamnagar/utilities/popularity_incrementor.rb +20 -0
  49. data/lib/jamnagar/utilities/runner.rb +12 -0
  50. data/lib/jamnagar/utilities/silent_logger.rb +17 -0
  51. data/lib/jamnagar/utilities/url_expander.rb +47 -0
  52. data/lib/jamnagar/utilities/utm_stripper.rb +21 -0
  53. data/lib/jamnagar/verifiers/uniqueness_verifier.rb +16 -0
  54. data/lib/jamnagar/verifiers/verifier.rb +13 -0
  55. data/lib/jamnagar/version.rb +3 -0
  56. data/lib/jamnagar.rb +7 -0
  57. data/run.rb +49 -0
  58. data/sentinal +10 -0
  59. data/spec/basic_store_spec.rb +53 -0
  60. data/spec/content_refinement_spec.rb +74 -0
  61. data/spec/contributor_detail_refinment_spec.rb +26 -0
  62. data/spec/contributor_store_spec.rb +31 -0
  63. data/spec/duplicate_detector_spec.rb +26 -0
  64. data/spec/helpers.rb +92 -0
  65. data/spec/item_spec.rb +9 -0
  66. data/spec/item_store_spec.rb +18 -0
  67. data/spec/mongo_adapter_spec.rb +18 -0
  68. data/spec/popularity_incrementor_spec.rb +23 -0
  69. data/spec/producers_spec.rb +9 -0
  70. data/spec/refined_item_store_spec.rb +29 -0
  71. data/spec/refinements_spec.rb +118 -0
  72. data/spec/runner_spec.rb +8 -0
  73. data/spec/scenarios_spec.rb +4 -0
  74. data/spec/source_detail_refinment_spec.rb +24 -0
  75. data/spec/source_store_spec.rb +31 -0
  76. data/spec/spec_helper.rb +98 -0
  77. data/spec/url_expander_spec.rb +46 -0
  78. data/spec/utm_stripper_spec.rb +31 -0
  79. data/spec/utm_stripping_spec.rb +5 -0
  80. data/spec/verifications_spec.rb +22 -0
  81. data/tracer.rb +61 -0
  82. data/tweet_stream.json +1 -0
  83. metadata +288 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bd80d453e64fa92696458a499a24b41c4f23b034
4
+ data.tar.gz: eb46c5b99a58c17b36ccdeac7a267fa5ecc714bc
5
+ SHA512:
6
+ metadata.gz: d5c18347d0c3fb399e735e94c6295d80acfa0487c189e755a67d71ecbb4fd830c8dca21fde7a292d66a465612609f246d2d802f7e2d8da8f3de1eefa7df3ee41
7
+ data.tar.gz: 07efa6b5e57d45f0b274b7c920426c0eb7b7c383449a458423cc7a4b159d83843c573aa3589610bbca7105671bf9d3f33b42e2cdab240578964fc3f7192a742c
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ .vagrant
3
+ coverage
4
+ /.bundle/
5
+ /.yardoc
6
+ /Gemfile.lock
7
+ /_yardoc/
8
+ /coverage/
9
+ /doc/
10
+ /pkg/
11
+ /spec/reports/
12
+ /tmp/
13
+ *.bundle
14
+ *.so
15
+ *.o
16
+ *.a
17
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --require spec_helper
3
+ -f p
@@ -0,0 +1 @@
1
+ 1.5:56a91893-8c48-4bb0-bbe2-bb5c6a71458e
@@ -0,0 +1 @@
1
+ 1422936767
@@ -0,0 +1 @@
1
+ 56a91893-8c48-4bb0-bbe2-bb5c6a71458e
@@ -0,0 +1 @@
1
+ cd630fe31aa041bc9a38f3df8739760c
@@ -0,0 +1,27 @@
1
+ -----BEGIN RSA PRIVATE KEY-----
2
+ MIIEpAIBAAKCAQEAxYZos0iMqoP7EUagdjF9RKZCxxQcIQul5IpJ9r47icgMbnux
3
+ 5cPK4l44JadfO3YUWFvGQ1LOGVYGvL/F0E+m1JSWAjk1+9lLjdAwIMYRgrCmGRXp
4
+ 8CWN9Ph/EMz60iLPfWYb1IflCfb4lBJVQIN3RL/Pif0mqhMg3wIou/kGHrAUHh5y
5
+ cpqMk4RCTvyLb8BcijTDgvK4GTV7AFDttIHAfSPUmVSBn5zhw4mpQqtPCeGA/aRP
6
+ qCKgpwXCzzXqY8vi24M2E2x3kM8GvLIxISSK+n+CuLcw2zVoDWmN2HnLB5parOWF
7
+ Bsc+Xf+fXFeQkPN3e3rnUQLrUNqHNsYamKaGpQIDAQABAoIBAF86icfFtmuO7cBK
8
+ eJoDCg3Kym8INveHkhc4xKf81t96XuP1JrNLTckM+6zLoJIeP3jJ5jAW/94fJ2hg
9
+ XJItlJTwfdHcmzYscMySDmH+m/qSB8IYWoy4zlPIahpDeLlOpxIh5FD2o2nGc9mP
10
+ bSKJPzsZ7ojsT3O3fSWG+PSG/nhUOdHG9luiuHToe9howKsvKkf4S11S9XXPx6gI
11
+ LIkdphcbykqMHhCX3rSNu9pblc0WunuQ7f4kHSQtmWfbxnzFPTraNk3+uban793u
12
+ ypBjN/el7/RtNHs0psoccv4YeKGGZYtYFh2Mq9w3j+1lA+rCRtWI2/jLMr+KZz1W
13
+ XXaEvwECgYEA9zbOO6Hei9l5o8UeZlWVGugSQbC+WIMNNSzpjFHK6UNRe7dvYZa0
14
+ /oZEir1EIbJDMw9Rn723zjVUuVDyykAur0fL6v+0OWuLnKMh7Icokv/41ZOtQCtD
15
+ zf4XPu+vEYt0LTYuiw9e8QB2U0cc7gDaVxh31+FNGjlBj1yspG2+qZECgYEAzIuG
16
+ NOoiakEEnhPcc2+JOyiUXav5sLNQxbbBaJHqq17AvNiCXpQm/nWojBiDRkq3GdSx
17
+ Lf6wN0RDnGRj30ixPx0q88Pa7ETRBaYRcmb9fTCBGu2DNTq165yAg8fNv7lH27a1
18
+ qxG7xtwyLiEM/gX8I8qPzRTXdhCnE3d3IkDL4dUCgYEAlIgJQSrwW3Y+73bv3Oxu
19
+ ucvVrEJVGzkiJmDlsbkzARPBUWPVN/0koghqBjHRTa2dUoBRunhyhyLj1LQeLAaE
20
+ BixNCNS5pAZJy8L4DvTmG+xPxYMFBRj5lDqmHNpFhXUUSFXVgRerI4HzlfWUHCvH
21
+ A61f1AU6HhdonIpB8Ek/1QECgYArB7MdEKBa0ADDrJP06OLqwhesORCXHrIAVfkM
22
+ IZ5HnPBZCgcrFGm7oE7r4gBD2lX4pij77xdtGPxMO0ZTY7x99YnirDIzACsk0BHd
23
+ ilMLchfG7W5r1UUOIudaUb8z9Stryl0fduSU6h/YWnWcPvATK3ri/t5w5QHw83Hp
24
+ j84UXQKBgQD1rsAR0KqYwquC+LE88pmb+aMOeHp596SseJhZIvH5uzcJVkCapdGY
25
+ 6ow2v7ctNIVsZnollanX/faaIlP1urAh335az3jykUZ935+gZyUytojID+fGWAXK
26
+ eDCFZ2pkqwGiYNdqbMY0JeKk2sGoSiNRuDfvvmbNxolknwSzjIZ4CQ==
27
+ -----END RSA PRIVATE KEY-----
@@ -0,0 +1 @@
1
+ {"virtualbox":{"/vagrant":{"guestpath":"/vagrant","hostpath":"/Users/clayton/Projects/jamnagar","disabled":false}}}
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in jamnagar.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,64 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ jamnagar (1.3.7)
5
+ httparty (~> 0.13)
6
+ moped (~> 2.0)
7
+ multi_json (~> 1.10)
8
+ nokogiri (~> 1.6)
9
+ require_all (~> 1.3)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ blinky-tape-test-status (1.1.3)
15
+ serialport
16
+ bson (2.3.0)
17
+ connection_pool (2.1.2)
18
+ diff-lcs (1.2.5)
19
+ docile (1.1.5)
20
+ httparty (0.13.3)
21
+ json (~> 1.8)
22
+ multi_xml (>= 0.5.2)
23
+ json (1.8.2)
24
+ mini_portile (0.6.2)
25
+ moped (2.0.4)
26
+ bson (~> 2.2)
27
+ connection_pool (~> 2.0)
28
+ optionable (~> 0.2.0)
29
+ multi_json (1.10.1)
30
+ multi_xml (0.5.5)
31
+ nokogiri (1.6.6.2)
32
+ mini_portile (~> 0.6.0)
33
+ optionable (0.2.0)
34
+ rake (10.4.2)
35
+ require_all (1.3.2)
36
+ rspec (3.1.0)
37
+ rspec-core (~> 3.1.0)
38
+ rspec-expectations (~> 3.1.0)
39
+ rspec-mocks (~> 3.1.0)
40
+ rspec-core (3.1.7)
41
+ rspec-support (~> 3.1.0)
42
+ rspec-expectations (3.1.2)
43
+ diff-lcs (>= 1.2.0, < 2.0)
44
+ rspec-support (~> 3.1.0)
45
+ rspec-mocks (3.1.3)
46
+ rspec-support (~> 3.1.0)
47
+ rspec-support (3.1.2)
48
+ serialport (1.3.1)
49
+ simplecov (0.9.1)
50
+ docile (~> 1.1.0)
51
+ multi_json (~> 1.0)
52
+ simplecov-html (~> 0.8.0)
53
+ simplecov-html (0.8.0)
54
+
55
+ PLATFORMS
56
+ ruby
57
+
58
+ DEPENDENCIES
59
+ blinky-tape-test-status (~> 1.1)
60
+ bundler (~> 1.7)
61
+ jamnagar!
62
+ rake (~> 10.0)
63
+ rspec (~> 3.1)
64
+ simplecov (~> 0.9)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Clayton Lengel-Zigich
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Jamnagar
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'jamnagar'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install jamnagar
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/jamnagar/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/UTF-8-test.txt ADDED
Binary file
data/Vagrantfile ADDED
@@ -0,0 +1,67 @@
1
+ # -*- mode: ruby -*-
2
+ # vi: set ft=ruby :
3
+
4
+ # All Vagrant configuration is done below. The "2" in Vagrant.configure
5
+ # configures the configuration version (we support older styles for
6
+ # backwards compatibility). Please don't change it unless you know what
7
+ # you're doing.
8
+ Vagrant.configure(2) do |config|
9
+ # The most common configuration options are documented and commented below.
10
+ # For a complete reference, please see the online documentation at
11
+ # https://docs.vagrantup.com.
12
+
13
+ # Every Vagrant development environment requires a box. You can search for
14
+ # boxes at https://atlas.hashicorp.com/search.
15
+ config.vm.box = "ubuntu/trusty64"
16
+
17
+ # Disable automatic box update checking. If you disable this, then
18
+ # boxes will only be checked for updates when the user runs
19
+ # `vagrant box outdated`. This is not recommended.
20
+ # config.vm.box_check_update = false
21
+
22
+ # Create a forwarded port mapping which allows access to a specific port
23
+ # within the machine from a port on the host machine. In the example below,
24
+ # accessing "localhost:8080" will access port 80 on the guest machine.
25
+ # config.vm.network "forwarded_port", guest: 80, host: 8080
26
+
27
+ # Create a private network, which allows host-only access to the machine
28
+ # using a specific IP.
29
+ # config.vm.network "private_network", ip: "192.168.33.10"
30
+
31
+ # Create a public network, which generally matched to bridged network.
32
+ # Bridged networks make the machine appear as another physical device on
33
+ # your network.
34
+ # config.vm.network "public_network"
35
+
36
+ # Share an additional folder to the guest VM. The first argument is
37
+ # the path on the host to the actual folder. The second argument is
38
+ # the path on the guest to mount the folder. And the optional third
39
+ # argument is a set of non-required options.
40
+ # config.vm.synced_folder "../data", "/vagrant_data"
41
+
42
+ # Provider-specific configuration so you can fine-tune various
43
+ # backing providers for Vagrant. These expose provider-specific options.
44
+ # Example for VirtualBox:
45
+ #
46
+ config.vm.provider "virtualbox" do |vb|
47
+ # Customize the amount of memory on the VM:
48
+ vb.memory = "2048"
49
+ end
50
+ #
51
+ # View the documentation for the provider you are using for more
52
+ # information on available options.
53
+
54
+ # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
55
+ # such as FTP and Heroku are also available. See the documentation at
56
+ # https://docs.vagrantup.com/v2/push/atlas.html for more information.
57
+ # config.push.define "atlas" do |push|
58
+ # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
59
+ # end
60
+
61
+ # Enable provisioning with a shell script. Additional provisioners such as
62
+ # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
63
+ # documentation for more information about their specific syntax and use.
64
+ config.vm.provision :shell, path: "bootstrap.sh"
65
+ end
66
+
67
+
data/blinky_tests ADDED
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+
3
+ blinky-tape-test-tool f && bundle exec rspec spec && blinky-tape-test-tool gs || blinky-tape-test-tool rs
data/bootstrap.sh ADDED
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env bash
2
+ sudo su
3
+
4
+ apt-get update
5
+ apt-get upgrade -y
6
+ apt-get install -y git-core curl zlib1g-dev build-essential libssl-dev libreadline-dev libyaml-dev libsqlite3-dev sqlite3 libxml2-dev libxslt1-dev libcurl4-openssl-dev python-software-properties apache2-mpm-worker apache2-threaded-dev libapr1-dev libaprutil1-dev
7
+
8
+ # Ruby
9
+ cd
10
+ wget http://ftp.ruby-lang.org/pub/ruby/2.1/ruby-2.1.5.tar.gz
11
+ tar -xzvf ruby-2.1.5.tar.gz
12
+ cd ruby-2.1.5/
13
+ ./configure
14
+ make
15
+ make install
16
+
17
+ # Gems
18
+ echo "gem: --no-ri --no-rdoc" > ~/.gemrc
19
+ gem update --system
20
+ gem install bundler
21
+
22
+ # # Git
23
+ git config --global color.ui true
24
+ git config --global user.name "Vagrant"
25
+ git config --global user.email "vagrant@localhost"
26
+ ssh-keygen -t rsa -C "vagrant@localhost"
27
+
28
+ # Node
29
+ add-apt-repository ppa:chris-lea/node.js
30
+ apt-get update
31
+ apt-get install nodejs
32
+
33
+ # # Apache
34
+ gem install passenger --no-ri --no-rdoc
35
+
36
+ passenger-install-apache2-module -a
data/jamnagar.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'jamnagar/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "jamnagar"
8
+ spec.version = Jamnagar::VERSION
9
+ spec.authors = ["Clayton Lengel-Zigich"]
10
+ spec.email = ["clayton@claytonlz.com"]
11
+ spec.summary = %q{An extensible content refinery.}
12
+ spec.description = %q{Jamnagar is a library for processing content using an petroleum refinement metaphor.}
13
+ spec.homepage = "https://github.com/clayton/jamnagar"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.1"
24
+ spec.add_development_dependency "blinky-tape-test-status", "~> 1.1"
25
+ spec.add_development_dependency "simplecov", "~> 0.9"
26
+
27
+ spec.add_runtime_dependency 'moped', "~> 2.0"
28
+ spec.add_runtime_dependency 'require_all', "~> 1.3"
29
+ spec.add_runtime_dependency 'multi_json', "~> 1.10"
30
+ spec.add_runtime_dependency 'httparty', "~> 0.13"
31
+ spec.add_runtime_dependency 'nokogiri', "~> 1.6"
32
+
33
+ end
@@ -0,0 +1,18 @@
1
+ module Jamnagar
2
+ module Adapters
3
+ class InsertError < StandardError;end
4
+ class Adapter
5
+ def initialize(args=nil)
6
+
7
+ end
8
+
9
+ def store(key, value)
10
+ key
11
+ end
12
+
13
+ def get(key)
14
+ ""
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ module Jamnagar
2
+ module Adapters
3
+ class FileSystemAdapter < Adapter
4
+ def initialize(args=nil)
5
+
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,54 @@
1
+ require 'moped'
2
+
3
+ module Jamnagar
4
+ module Adapters
5
+ class MongoAdapter < Adapter
6
+
7
+ def self.driver
8
+ @@driver ||= Moped::Session.new(["127.0.0.1:27017"])
9
+ end
10
+
11
+ def initialize(driver=nil, database="test", collection="catchall")
12
+ @driver = driver || Jamnagar::Adapters::MongoAdapter.driver
13
+ @driver.use database
14
+ @collection = collection
15
+ end
16
+
17
+ def store(key, value)
18
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
19
+ _session[@collection.to_sym].insert(value)
20
+ end
21
+ end
22
+
23
+ def get(key)
24
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
25
+ _session[@collection.to_sym].find("_id" => key).first
26
+ end
27
+ end
28
+
29
+ def find(params={})
30
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
31
+ _session[@collection.to_sym].find(params)
32
+ end
33
+ end
34
+
35
+ def find_first(params={})
36
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
37
+ _session[@collection.to_sym].find(params).limit(1).first
38
+ end
39
+ end
40
+
41
+ def update(query={}, updates={})
42
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
43
+ _session[@collection.to_sym].find(query).update(updates)
44
+ end
45
+ end
46
+
47
+ def find_and_modify(params={})
48
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
49
+ _session[@collection.to_sym].find(params[:query]).update(params[:update])
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,9 @@
1
+ module Jamnagar
2
+ module Adapters
3
+ class PersistentStoreAdapter
4
+ def find(*args)
5
+
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,6 @@
1
+ module Jamnagar
2
+ module Initializers
3
+ class Mongo
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,15 @@
1
+ require 'ostruct'
2
+
3
+ module Jamnagar
4
+ module Materials
5
+ class Item < Ore
6
+ def raw_contributor
7
+ to_h["raw"]["user"]
8
+ end
9
+ def raw_source
10
+ host = to_h['final_url_host'] || "nosource.jamnagar.co"
11
+ {"id" => host}
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ module Jamnagar
2
+ module Materials
3
+ class Ore
4
+ def initialize(args={})
5
+ @attributes = args
6
+ end
7
+
8
+ def merge_refinement(refinement={})
9
+ @attributes = @attributes.merge(refinement) if refinement
10
+ end
11
+
12
+ def to_h
13
+ @attributes.to_h
14
+ end
15
+
16
+ def [](key)
17
+ to_h[key]
18
+ end
19
+
20
+ def keys
21
+ to_h.keys
22
+ end
23
+
24
+ def to_json
25
+ JSON.dump(to_h)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,5 @@
1
+ module Jamnagar
2
+ module Producers
3
+
4
+ end
5
+ end
@@ -0,0 +1,6 @@
1
+ module Jamnagar
2
+ module Producers
3
+ class RSSProducer
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Jamnagar
2
+ module Producers
3
+ class TwitterProducer
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,46 @@
1
+ module Jamnagar
2
+ module Refineries
3
+ class ContentRefinery
4
+ def initialize(options={})
5
+ @items = options[:items] || []
6
+ @refiners = options[:refiners] || []
7
+ @verifiers = options[:verifiers] || []
8
+ @storage = options[:storage] || Storage::ItemStore.new
9
+ @logger = options[:logger] || Jamnagar::Utilities::SilentLogger.new
10
+ @runner = options[:runner] || Jamnagar::Utilities::Runner.new
11
+ end
12
+
13
+ def refine
14
+ @items.each_with_index do |item, index|
15
+ begin
16
+ @runner.run do
17
+ @storage.insert(verify(enrich(convert(item))))
18
+ end
19
+ rescue Jamnagar::Adapters::InsertError
20
+ @logger.error("Insert Error: _id => #{item['_id']}")
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+ def enrich(item)
27
+ @refiners.each do |refiner|
28
+ item = refiner.refine(item)
29
+ end
30
+ item
31
+ end
32
+
33
+ def verify(item)
34
+ @verifiers.each do |verifier|
35
+ item = verifier.verify(item)
36
+ end
37
+ item
38
+ end
39
+
40
+ def convert(item)
41
+ return Jamnagar::Materials::Item.new(item) unless item.is_a?(Jamnagar::Materials::Item)
42
+ item
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,17 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class ContributorDetail < Refiner
4
+ def initialize(store: nil)
5
+ @store = store
6
+ end
7
+
8
+ def to_s
9
+ "Contributor Detail"
10
+ end
11
+
12
+ def refinement_result(item)
13
+ {"contributor" => @store.find_contributor(item, item.raw_contributor)}
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class DuplicateDetection < Refiner
4
+ def initialize(detector=nil, store: nil)
5
+ @detector = detector || Utilities::DuplicateDetector.new(store)
6
+ end
7
+
8
+ def to_s
9
+ "Duplicate Detection"
10
+ end
11
+
12
+ def refine(item)
13
+ super item
14
+ end
15
+
16
+ def refinement_result(item)
17
+ result = @detector.detect(item["final_url"])
18
+ return {"duplicate" => false} unless result
19
+ {"duplicate" => true, "duplicate_of" => result["_id"]}
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,15 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ module Twitter
4
+ class MetaDataExtraction < Refiner
5
+ def initialize(extractor=nil)
6
+ @extractor = extractor || Jamnagar::Utilities::MetaDataExtractor.new
7
+ end
8
+
9
+ def refinement_result(item)
10
+ @extractor.extract(item)
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,22 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class PopularityIncrementation < Refiner
4
+ def initialize(incrementor=nil, store: nil)
5
+ @incrementor = incrementor || Utilities::PopularityIncrementor.new(store)
6
+ end
7
+
8
+ def to_s
9
+ "Popularity Incrementor"
10
+ end
11
+
12
+ def refine(item)
13
+ super item
14
+ end
15
+
16
+ def refinement_result(item)
17
+ @incrementor.increment(item["duplicate_of"]) if item["duplicate"] == true
18
+ {"popularity" => 1}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ require 'digest'
2
+
3
+ module Jamnagar
4
+ module Refiners
5
+ class PrimaryKeyGeneration < Refiner
6
+ def initialize(digester=nil)
7
+ @digester = digester || Digest::MD5
8
+ end
9
+
10
+ def to_s
11
+ "Primary Key Generation"
12
+ end
13
+
14
+ def refinement_result(item)
15
+ id = @digester.hexdigest(item.to_s)
16
+ {"_id" => id}
17
+ end
18
+ end
19
+ end
20
+ end