jamnagar 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rspec +3 -0
  4. data/.vagrant/machines/default/virtualbox/action_provision +1 -0
  5. data/.vagrant/machines/default/virtualbox/action_set_name +1 -0
  6. data/.vagrant/machines/default/virtualbox/id +1 -0
  7. data/.vagrant/machines/default/virtualbox/index_uuid +1 -0
  8. data/.vagrant/machines/default/virtualbox/private_key +27 -0
  9. data/.vagrant/machines/default/virtualbox/synced_folders +1 -0
  10. data/Gemfile +4 -0
  11. data/Gemfile.lock +64 -0
  12. data/LICENSE.txt +22 -0
  13. data/README.md +31 -0
  14. data/Rakefile +2 -0
  15. data/UTF-8-test.txt +0 -0
  16. data/Vagrantfile +67 -0
  17. data/blinky_tests +3 -0
  18. data/bootstrap.sh +36 -0
  19. data/jamnagar.gemspec +33 -0
  20. data/lib/jamnagar/adapters/adapter.rb +18 -0
  21. data/lib/jamnagar/adapters/file_system_adapter.rb +9 -0
  22. data/lib/jamnagar/adapters/mongo_adapter.rb +54 -0
  23. data/lib/jamnagar/adapters/persistent_store_adapter.rb +9 -0
  24. data/lib/jamnagar/initializers/mongo.rb +6 -0
  25. data/lib/jamnagar/materials/item.rb +15 -0
  26. data/lib/jamnagar/materials/ore.rb +29 -0
  27. data/lib/jamnagar/producers/producer.rb +5 -0
  28. data/lib/jamnagar/producers/rss_producer.rb +6 -0
  29. data/lib/jamnagar/producers/twitter_producer.rb +6 -0
  30. data/lib/jamnagar/refineries/content_refinery.rb +46 -0
  31. data/lib/jamnagar/refiners/contributor_detail.rb +17 -0
  32. data/lib/jamnagar/refiners/duplicate_detection.rb +23 -0
  33. data/lib/jamnagar/refiners/meta_data_extraction.rb +15 -0
  34. data/lib/jamnagar/refiners/popularity_incrementation.rb +22 -0
  35. data/lib/jamnagar/refiners/primary_key_generation.rb +20 -0
  36. data/lib/jamnagar/refiners/refiner.rb +22 -0
  37. data/lib/jamnagar/refiners/source_detail.rb +17 -0
  38. data/lib/jamnagar/refiners/url_expansion.rb +23 -0
  39. data/lib/jamnagar/refiners/utm_stripping.rb +13 -0
  40. data/lib/jamnagar/storage/basic_store.rb +34 -0
  41. data/lib/jamnagar/storage/contributor_store.rb +35 -0
  42. data/lib/jamnagar/storage/in_memory_cache.rb +17 -0
  43. data/lib/jamnagar/storage/item_store.rb +21 -0
  44. data/lib/jamnagar/storage/refined_item_store.rb +17 -0
  45. data/lib/jamnagar/storage/source_store.rb +35 -0
  46. data/lib/jamnagar/utilities/duplicate_detector.rb +12 -0
  47. data/lib/jamnagar/utilities/meta_data_extractor.rb +16 -0
  48. data/lib/jamnagar/utilities/popularity_incrementor.rb +20 -0
  49. data/lib/jamnagar/utilities/runner.rb +12 -0
  50. data/lib/jamnagar/utilities/silent_logger.rb +17 -0
  51. data/lib/jamnagar/utilities/url_expander.rb +47 -0
  52. data/lib/jamnagar/utilities/utm_stripper.rb +21 -0
  53. data/lib/jamnagar/verifiers/uniqueness_verifier.rb +16 -0
  54. data/lib/jamnagar/verifiers/verifier.rb +13 -0
  55. data/lib/jamnagar/version.rb +3 -0
  56. data/lib/jamnagar.rb +7 -0
  57. data/run.rb +49 -0
  58. data/sentinal +10 -0
  59. data/spec/basic_store_spec.rb +53 -0
  60. data/spec/content_refinement_spec.rb +74 -0
  61. data/spec/contributor_detail_refinment_spec.rb +26 -0
  62. data/spec/contributor_store_spec.rb +31 -0
  63. data/spec/duplicate_detector_spec.rb +26 -0
  64. data/spec/helpers.rb +92 -0
  65. data/spec/item_spec.rb +9 -0
  66. data/spec/item_store_spec.rb +18 -0
  67. data/spec/mongo_adapter_spec.rb +18 -0
  68. data/spec/popularity_incrementor_spec.rb +23 -0
  69. data/spec/producers_spec.rb +9 -0
  70. data/spec/refined_item_store_spec.rb +29 -0
  71. data/spec/refinements_spec.rb +118 -0
  72. data/spec/runner_spec.rb +8 -0
  73. data/spec/scenarios_spec.rb +4 -0
  74. data/spec/source_detail_refinment_spec.rb +24 -0
  75. data/spec/source_store_spec.rb +31 -0
  76. data/spec/spec_helper.rb +98 -0
  77. data/spec/url_expander_spec.rb +46 -0
  78. data/spec/utm_stripper_spec.rb +31 -0
  79. data/spec/utm_stripping_spec.rb +5 -0
  80. data/spec/verifications_spec.rb +22 -0
  81. data/tracer.rb +61 -0
  82. data/tweet_stream.json +1 -0
  83. metadata +288 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bd80d453e64fa92696458a499a24b41c4f23b034
4
+ data.tar.gz: eb46c5b99a58c17b36ccdeac7a267fa5ecc714bc
5
+ SHA512:
6
+ metadata.gz: d5c18347d0c3fb399e735e94c6295d80acfa0487c189e755a67d71ecbb4fd830c8dca21fde7a292d66a465612609f246d2d802f7e2d8da8f3de1eefa7df3ee41
7
+ data.tar.gz: 07efa6b5e57d45f0b274b7c920426c0eb7b7c383449a458423cc7a4b159d83843c573aa3589610bbca7105671bf9d3f33b42e2cdab240578964fc3f7192a742c
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ .vagrant
3
+ coverage
4
+ /.bundle/
5
+ /.yardoc
6
+ /Gemfile.lock
7
+ /_yardoc/
8
+ /coverage/
9
+ /doc/
10
+ /pkg/
11
+ /spec/reports/
12
+ /tmp/
13
+ *.bundle
14
+ *.so
15
+ *.o
16
+ *.a
17
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --require spec_helper
3
+ -f p
@@ -0,0 +1 @@
1
+ 1.5:56a91893-8c48-4bb0-bbe2-bb5c6a71458e
@@ -0,0 +1 @@
1
+ 1422936767
@@ -0,0 +1 @@
1
+ 56a91893-8c48-4bb0-bbe2-bb5c6a71458e
@@ -0,0 +1 @@
1
+ cd630fe31aa041bc9a38f3df8739760c
@@ -0,0 +1,27 @@
1
+ -----BEGIN RSA PRIVATE KEY-----
2
+ MIIEpAIBAAKCAQEAxYZos0iMqoP7EUagdjF9RKZCxxQcIQul5IpJ9r47icgMbnux
3
+ 5cPK4l44JadfO3YUWFvGQ1LOGVYGvL/F0E+m1JSWAjk1+9lLjdAwIMYRgrCmGRXp
4
+ 8CWN9Ph/EMz60iLPfWYb1IflCfb4lBJVQIN3RL/Pif0mqhMg3wIou/kGHrAUHh5y
5
+ cpqMk4RCTvyLb8BcijTDgvK4GTV7AFDttIHAfSPUmVSBn5zhw4mpQqtPCeGA/aRP
6
+ qCKgpwXCzzXqY8vi24M2E2x3kM8GvLIxISSK+n+CuLcw2zVoDWmN2HnLB5parOWF
7
+ Bsc+Xf+fXFeQkPN3e3rnUQLrUNqHNsYamKaGpQIDAQABAoIBAF86icfFtmuO7cBK
8
+ eJoDCg3Kym8INveHkhc4xKf81t96XuP1JrNLTckM+6zLoJIeP3jJ5jAW/94fJ2hg
9
+ XJItlJTwfdHcmzYscMySDmH+m/qSB8IYWoy4zlPIahpDeLlOpxIh5FD2o2nGc9mP
10
+ bSKJPzsZ7ojsT3O3fSWG+PSG/nhUOdHG9luiuHToe9howKsvKkf4S11S9XXPx6gI
11
+ LIkdphcbykqMHhCX3rSNu9pblc0WunuQ7f4kHSQtmWfbxnzFPTraNk3+uban793u
12
+ ypBjN/el7/RtNHs0psoccv4YeKGGZYtYFh2Mq9w3j+1lA+rCRtWI2/jLMr+KZz1W
13
+ XXaEvwECgYEA9zbOO6Hei9l5o8UeZlWVGugSQbC+WIMNNSzpjFHK6UNRe7dvYZa0
14
+ /oZEir1EIbJDMw9Rn723zjVUuVDyykAur0fL6v+0OWuLnKMh7Icokv/41ZOtQCtD
15
+ zf4XPu+vEYt0LTYuiw9e8QB2U0cc7gDaVxh31+FNGjlBj1yspG2+qZECgYEAzIuG
16
+ NOoiakEEnhPcc2+JOyiUXav5sLNQxbbBaJHqq17AvNiCXpQm/nWojBiDRkq3GdSx
17
+ Lf6wN0RDnGRj30ixPx0q88Pa7ETRBaYRcmb9fTCBGu2DNTq165yAg8fNv7lH27a1
18
+ qxG7xtwyLiEM/gX8I8qPzRTXdhCnE3d3IkDL4dUCgYEAlIgJQSrwW3Y+73bv3Oxu
19
+ ucvVrEJVGzkiJmDlsbkzARPBUWPVN/0koghqBjHRTa2dUoBRunhyhyLj1LQeLAaE
20
+ BixNCNS5pAZJy8L4DvTmG+xPxYMFBRj5lDqmHNpFhXUUSFXVgRerI4HzlfWUHCvH
21
+ A61f1AU6HhdonIpB8Ek/1QECgYArB7MdEKBa0ADDrJP06OLqwhesORCXHrIAVfkM
22
+ IZ5HnPBZCgcrFGm7oE7r4gBD2lX4pij77xdtGPxMO0ZTY7x99YnirDIzACsk0BHd
23
+ ilMLchfG7W5r1UUOIudaUb8z9Stryl0fduSU6h/YWnWcPvATK3ri/t5w5QHw83Hp
24
+ j84UXQKBgQD1rsAR0KqYwquC+LE88pmb+aMOeHp596SseJhZIvH5uzcJVkCapdGY
25
+ 6ow2v7ctNIVsZnollanX/faaIlP1urAh335az3jykUZ935+gZyUytojID+fGWAXK
26
+ eDCFZ2pkqwGiYNdqbMY0JeKk2sGoSiNRuDfvvmbNxolknwSzjIZ4CQ==
27
+ -----END RSA PRIVATE KEY-----
@@ -0,0 +1 @@
1
+ {"virtualbox":{"/vagrant":{"guestpath":"/vagrant","hostpath":"/Users/clayton/Projects/jamnagar","disabled":false}}}
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in jamnagar.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,64 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ jamnagar (1.3.7)
5
+ httparty (~> 0.13)
6
+ moped (~> 2.0)
7
+ multi_json (~> 1.10)
8
+ nokogiri (~> 1.6)
9
+ require_all (~> 1.3)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ blinky-tape-test-status (1.1.3)
15
+ serialport
16
+ bson (2.3.0)
17
+ connection_pool (2.1.2)
18
+ diff-lcs (1.2.5)
19
+ docile (1.1.5)
20
+ httparty (0.13.3)
21
+ json (~> 1.8)
22
+ multi_xml (>= 0.5.2)
23
+ json (1.8.2)
24
+ mini_portile (0.6.2)
25
+ moped (2.0.4)
26
+ bson (~> 2.2)
27
+ connection_pool (~> 2.0)
28
+ optionable (~> 0.2.0)
29
+ multi_json (1.10.1)
30
+ multi_xml (0.5.5)
31
+ nokogiri (1.6.6.2)
32
+ mini_portile (~> 0.6.0)
33
+ optionable (0.2.0)
34
+ rake (10.4.2)
35
+ require_all (1.3.2)
36
+ rspec (3.1.0)
37
+ rspec-core (~> 3.1.0)
38
+ rspec-expectations (~> 3.1.0)
39
+ rspec-mocks (~> 3.1.0)
40
+ rspec-core (3.1.7)
41
+ rspec-support (~> 3.1.0)
42
+ rspec-expectations (3.1.2)
43
+ diff-lcs (>= 1.2.0, < 2.0)
44
+ rspec-support (~> 3.1.0)
45
+ rspec-mocks (3.1.3)
46
+ rspec-support (~> 3.1.0)
47
+ rspec-support (3.1.2)
48
+ serialport (1.3.1)
49
+ simplecov (0.9.1)
50
+ docile (~> 1.1.0)
51
+ multi_json (~> 1.0)
52
+ simplecov-html (~> 0.8.0)
53
+ simplecov-html (0.8.0)
54
+
55
+ PLATFORMS
56
+ ruby
57
+
58
+ DEPENDENCIES
59
+ blinky-tape-test-status (~> 1.1)
60
+ bundler (~> 1.7)
61
+ jamnagar!
62
+ rake (~> 10.0)
63
+ rspec (~> 3.1)
64
+ simplecov (~> 0.9)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Clayton Lengel-Zigich
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Jamnagar
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'jamnagar'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install jamnagar
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/jamnagar/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/UTF-8-test.txt ADDED
Binary file
data/Vagrantfile ADDED
@@ -0,0 +1,67 @@
1
+ # -*- mode: ruby -*-
2
+ # vi: set ft=ruby :
3
+
4
+ # All Vagrant configuration is done below. The "2" in Vagrant.configure
5
+ # configures the configuration version (we support older styles for
6
+ # backwards compatibility). Please don't change it unless you know what
7
+ # you're doing.
8
+ Vagrant.configure(2) do |config|
9
+ # The most common configuration options are documented and commented below.
10
+ # For a complete reference, please see the online documentation at
11
+ # https://docs.vagrantup.com.
12
+
13
+ # Every Vagrant development environment requires a box. You can search for
14
+ # boxes at https://atlas.hashicorp.com/search.
15
+ config.vm.box = "ubuntu/trusty64"
16
+
17
+ # Disable automatic box update checking. If you disable this, then
18
+ # boxes will only be checked for updates when the user runs
19
+ # `vagrant box outdated`. This is not recommended.
20
+ # config.vm.box_check_update = false
21
+
22
+ # Create a forwarded port mapping which allows access to a specific port
23
+ # within the machine from a port on the host machine. In the example below,
24
+ # accessing "localhost:8080" will access port 80 on the guest machine.
25
+ # config.vm.network "forwarded_port", guest: 80, host: 8080
26
+
27
+ # Create a private network, which allows host-only access to the machine
28
+ # using a specific IP.
29
+ # config.vm.network "private_network", ip: "192.168.33.10"
30
+
31
+ # Create a public network, which generally matched to bridged network.
32
+ # Bridged networks make the machine appear as another physical device on
33
+ # your network.
34
+ # config.vm.network "public_network"
35
+
36
+ # Share an additional folder to the guest VM. The first argument is
37
+ # the path on the host to the actual folder. The second argument is
38
+ # the path on the guest to mount the folder. And the optional third
39
+ # argument is a set of non-required options.
40
+ # config.vm.synced_folder "../data", "/vagrant_data"
41
+
42
+ # Provider-specific configuration so you can fine-tune various
43
+ # backing providers for Vagrant. These expose provider-specific options.
44
+ # Example for VirtualBox:
45
+ #
46
+ config.vm.provider "virtualbox" do |vb|
47
+ # Customize the amount of memory on the VM:
48
+ vb.memory = "2048"
49
+ end
50
+ #
51
+ # View the documentation for the provider you are using for more
52
+ # information on available options.
53
+
54
+ # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
55
+ # such as FTP and Heroku are also available. See the documentation at
56
+ # https://docs.vagrantup.com/v2/push/atlas.html for more information.
57
+ # config.push.define "atlas" do |push|
58
+ # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
59
+ # end
60
+
61
+ # Enable provisioning with a shell script. Additional provisioners such as
62
+ # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
63
+ # documentation for more information about their specific syntax and use.
64
+ config.vm.provision :shell, path: "bootstrap.sh"
65
+ end
66
+
67
+
data/blinky_tests ADDED
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+
3
+ blinky-tape-test-tool f && bundle exec rspec spec && blinky-tape-test-tool gs || blinky-tape-test-tool rs
data/bootstrap.sh ADDED
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env bash
2
+ sudo su
3
+
4
+ apt-get update
5
+ apt-get upgrade -y
6
+ apt-get install -y git-core curl zlib1g-dev build-essential libssl-dev libreadline-dev libyaml-dev libsqlite3-dev sqlite3 libxml2-dev libxslt1-dev libcurl4-openssl-dev python-software-properties apache2-mpm-worker apache2-threaded-dev libapr1-dev libaprutil1-dev
7
+
8
+ # Ruby
9
+ cd
10
+ wget http://ftp.ruby-lang.org/pub/ruby/2.1/ruby-2.1.5.tar.gz
11
+ tar -xzvf ruby-2.1.5.tar.gz
12
+ cd ruby-2.1.5/
13
+ ./configure
14
+ make
15
+ make install
16
+
17
+ # Gems
18
+ echo "gem: --no-ri --no-rdoc" > ~/.gemrc
19
+ gem update --system
20
+ gem install bundler
21
+
22
+ # # Git
23
+ git config --global color.ui true
24
+ git config --global user.name "Vagrant"
25
+ git config --global user.email "vagrant@localhost"
26
+ ssh-keygen -t rsa -C "vagrant@localhost"
27
+
28
+ # Node
29
+ add-apt-repository ppa:chris-lea/node.js
30
+ apt-get update
31
+ apt-get install nodejs
32
+
33
+ # # Apache
34
+ gem install passenger --no-ri --no-rdoc
35
+
36
+ passenger-install-apache2-module -a
data/jamnagar.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'jamnagar/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "jamnagar"
8
+ spec.version = Jamnagar::VERSION
9
+ spec.authors = ["Clayton Lengel-Zigich"]
10
+ spec.email = ["clayton@claytonlz.com"]
11
+ spec.summary = %q{An extensible content refinery.}
12
+ spec.description = %q{Jamnagar is a library for processing content using an petroleum refinement metaphor.}
13
+ spec.homepage = "https://github.com/clayton/jamnagar"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.1"
24
+ spec.add_development_dependency "blinky-tape-test-status", "~> 1.1"
25
+ spec.add_development_dependency "simplecov", "~> 0.9"
26
+
27
+ spec.add_runtime_dependency 'moped', "~> 2.0"
28
+ spec.add_runtime_dependency 'require_all', "~> 1.3"
29
+ spec.add_runtime_dependency 'multi_json', "~> 1.10"
30
+ spec.add_runtime_dependency 'httparty', "~> 0.13"
31
+ spec.add_runtime_dependency 'nokogiri', "~> 1.6"
32
+
33
+ end
@@ -0,0 +1,18 @@
1
+ module Jamnagar
2
+ module Adapters
3
+ class InsertError < StandardError;end
4
+ class Adapter
5
+ def initialize(args=nil)
6
+
7
+ end
8
+
9
+ def store(key, value)
10
+ key
11
+ end
12
+
13
+ def get(key)
14
+ ""
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ module Jamnagar
2
+ module Adapters
3
+ class FileSystemAdapter < Adapter
4
+ def initialize(args=nil)
5
+
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,54 @@
1
+ require 'moped'
2
+
3
+ module Jamnagar
4
+ module Adapters
5
+ class MongoAdapter < Adapter
6
+
7
+ def self.driver
8
+ @@driver ||= Moped::Session.new(["127.0.0.1:27017"])
9
+ end
10
+
11
+ def initialize(driver=nil, database="test", collection="catchall")
12
+ @driver = driver || Jamnagar::Adapters::MongoAdapter.driver
13
+ @driver.use database
14
+ @collection = collection
15
+ end
16
+
17
+ def store(key, value)
18
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
19
+ _session[@collection.to_sym].insert(value)
20
+ end
21
+ end
22
+
23
+ def get(key)
24
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
25
+ _session[@collection.to_sym].find("_id" => key).first
26
+ end
27
+ end
28
+
29
+ def find(params={})
30
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
31
+ _session[@collection.to_sym].find(params)
32
+ end
33
+ end
34
+
35
+ def find_first(params={})
36
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
37
+ _session[@collection.to_sym].find(params).limit(1).first
38
+ end
39
+ end
40
+
41
+ def update(query={}, updates={})
42
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
43
+ _session[@collection.to_sym].find(query).update(updates)
44
+ end
45
+ end
46
+
47
+ def find_and_modify(params={})
48
+ @driver.with(safe: { wtimeout: 5 }) do |_session|
49
+ _session[@collection.to_sym].find(params[:query]).update(params[:update])
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,9 @@
1
+ module Jamnagar
2
+ module Adapters
3
+ class PersistentStoreAdapter
4
+ def find(*args)
5
+
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,6 @@
1
+ module Jamnagar
2
+ module Initializers
3
+ class Mongo
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,15 @@
1
+ require 'ostruct'
2
+
3
+ module Jamnagar
4
+ module Materials
5
+ class Item < Ore
6
+ def raw_contributor
7
+ to_h["raw"]["user"]
8
+ end
9
+ def raw_source
10
+ host = to_h['final_url_host'] || "nosource.jamnagar.co"
11
+ {"id" => host}
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ module Jamnagar
2
+ module Materials
3
+ class Ore
4
+ def initialize(args={})
5
+ @attributes = args
6
+ end
7
+
8
+ def merge_refinement(refinement={})
9
+ @attributes = @attributes.merge(refinement) if refinement
10
+ end
11
+
12
+ def to_h
13
+ @attributes.to_h
14
+ end
15
+
16
+ def [](key)
17
+ to_h[key]
18
+ end
19
+
20
+ def keys
21
+ to_h.keys
22
+ end
23
+
24
+ def to_json
25
+ JSON.dump(to_h)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,5 @@
1
+ module Jamnagar
2
+ module Producers
3
+
4
+ end
5
+ end
@@ -0,0 +1,6 @@
1
+ module Jamnagar
2
+ module Producers
3
+ class RSSProducer
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Jamnagar
2
+ module Producers
3
+ class TwitterProducer
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,46 @@
1
+ module Jamnagar
2
+ module Refineries
3
+ class ContentRefinery
4
+ def initialize(options={})
5
+ @items = options[:items] || []
6
+ @refiners = options[:refiners] || []
7
+ @verifiers = options[:verifiers] || []
8
+ @storage = options[:storage] || Storage::ItemStore.new
9
+ @logger = options[:logger] || Jamnagar::Utilities::SilentLogger.new
10
+ @runner = options[:runner] || Jamnagar::Utilities::Runner.new
11
+ end
12
+
13
+ def refine
14
+ @items.each_with_index do |item, index|
15
+ begin
16
+ @runner.run do
17
+ @storage.insert(verify(enrich(convert(item))))
18
+ end
19
+ rescue Jamnagar::Adapters::InsertError
20
+ @logger.error("Insert Error: _id => #{item['_id']}")
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+ def enrich(item)
27
+ @refiners.each do |refiner|
28
+ item = refiner.refine(item)
29
+ end
30
+ item
31
+ end
32
+
33
+ def verify(item)
34
+ @verifiers.each do |verifier|
35
+ item = verifier.verify(item)
36
+ end
37
+ item
38
+ end
39
+
40
+ def convert(item)
41
+ return Jamnagar::Materials::Item.new(item) unless item.is_a?(Jamnagar::Materials::Item)
42
+ item
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,17 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class ContributorDetail < Refiner
4
+ def initialize(store: nil)
5
+ @store = store
6
+ end
7
+
8
+ def to_s
9
+ "Contributor Detail"
10
+ end
11
+
12
+ def refinement_result(item)
13
+ {"contributor" => @store.find_contributor(item, item.raw_contributor)}
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class DuplicateDetection < Refiner
4
+ def initialize(detector=nil, store: nil)
5
+ @detector = detector || Utilities::DuplicateDetector.new(store)
6
+ end
7
+
8
+ def to_s
9
+ "Duplicate Detection"
10
+ end
11
+
12
+ def refine(item)
13
+ super item
14
+ end
15
+
16
+ def refinement_result(item)
17
+ result = @detector.detect(item["final_url"])
18
+ return {"duplicate" => false} unless result
19
+ {"duplicate" => true, "duplicate_of" => result["_id"]}
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,15 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ module Twitter
4
+ class MetaDataExtraction < Refiner
5
+ def initialize(extractor=nil)
6
+ @extractor = extractor || Jamnagar::Utilities::MetaDataExtractor.new
7
+ end
8
+
9
+ def refinement_result(item)
10
+ @extractor.extract(item)
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,22 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class PopularityIncrementation < Refiner
4
+ def initialize(incrementor=nil, store: nil)
5
+ @incrementor = incrementor || Utilities::PopularityIncrementor.new(store)
6
+ end
7
+
8
+ def to_s
9
+ "Popularity Incrementor"
10
+ end
11
+
12
+ def refine(item)
13
+ super item
14
+ end
15
+
16
+ def refinement_result(item)
17
+ @incrementor.increment(item["duplicate_of"]) if item["duplicate"] == true
18
+ {"popularity" => 1}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ require 'digest'
2
+
3
+ module Jamnagar
4
+ module Refiners
5
+ class PrimaryKeyGeneration < Refiner
6
+ def initialize(digester=nil)
7
+ @digester = digester || Digest::MD5
8
+ end
9
+
10
+ def to_s
11
+ "Primary Key Generation"
12
+ end
13
+
14
+ def refinement_result(item)
15
+ id = @digester.hexdigest(item.to_s)
16
+ {"_id" => id}
17
+ end
18
+ end
19
+ end
20
+ end