juxtaparssionate 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZTY1Y2U2NGMzZjBmNGFhM2I2ZWE5M2VmMmZjOGJiNzI3YTY1M2Q0ZQ==
5
+ data.tar.gz: !binary |-
6
+ OTg1MTFjNGI2NDEyZDM2ZTU3OWFjNGY2YzhjN2Q3YTE1ZjM4ZjI1OQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZTNkODg3MGYyZGM0OGM3OTI3MzVkNGZmZTk5MTA5OTQzMjYxYmZjMmE5NzFm
10
+ OTEyMGRkOGViMTgwN2Q0YWIxMDhiNmM5ZjUzYmI2NjM1OTFjNzg5MTAwYjQ0
11
+ M2U1ZmFlN2NlZTU4MzAyMmJiNzZmZGQzNzc3ZWEyYzhkYmQ1Y2U=
12
+ data.tar.gz: !binary |-
13
+ ZGQ1YjdlMGE5MmUyNTA5MzVlOTcwZGFiMzVmMzRkMGZiNjFkZGY3NzY3YTNh
14
+ OGI1YjVlYjk0MmMzMDFkMjY4YzZmMjAwMDRjMTkzYWUxNjU5ZWZjN2E2YzA1
15
+ YzM0Y2UxNjlmMDg3NzRkMDhhNjRkM2VjMjc3ZTA4NThhY2E5NjU=
@@ -0,0 +1,5 @@
1
+ == README
2
+
3
+ {<img src="https://travis-ci.org/trosborn/passion-parse.svg?branch=master" alt="Build Status" />}[https://travis-ci.org/trosborn/passion-parse]
4
+
5
+ juxtaparssionate parses postings and juxtaposes the results
@@ -0,0 +1,43 @@
1
+ require 'sanitize'
2
+ require 'treat'
3
+
4
+ include Treat::Core::DSL
5
+
6
+ class Parser
7
+ attr_accessor :contents
8
+
9
+ def initialize file_path
10
+ @contents = document "#{file_path}"
11
+ end
12
+
13
+ def strip_tags
14
+ stripped = Sanitize.fragment @contents
15
+ stripped.split.join(' ')
16
+ end
17
+
18
+ def extract_nouns
19
+ words = @contents.apply(:chunk, :segment, :tokenize, :category)
20
+ words.nouns.map { |w| w.to_s }
21
+ end
22
+
23
+ def extract_words
24
+ chunked = strip_tags.apply(:chunk, :segment, :tokenize)
25
+ chunked.map { |w| w.to_s }
26
+ end
27
+
28
+ def count_words array
29
+ @word_count = {}
30
+ array.each do |word|
31
+ if @word_count[word] != nil
32
+ @word_count[word] += 1
33
+ else
34
+ @word_count[word] = 1
35
+ end
36
+ end
37
+ @word_count
38
+ end
39
+
40
+ def order_hash hash
41
+ hash.sort_by { |key, value| value }
42
+ end
43
+ end
@@ -0,0 +1,23 @@
1
+ require 'mechanize'
2
+ require 'nokogiri'
3
+
4
+ class Scraper
5
+ def we_work
6
+ agent = Mechanize.new
7
+ page = agent.get('https://weworkremotely.com/categories/2/jobs')
8
+ page.links_with( :href => %r{/jobs/} ).each_with_index do |link, index|
9
+ next if link.href == 'https://weworkremotely.com/jobs/new'
10
+ page = link.click
11
+ doc = page.parser
12
+ extracted_words = doc.css('title').to_s
13
+ extracted_words << doc.css('div.listing-container').to_s
14
+ save extracted_words, index
15
+ end
16
+ end
17
+
18
+ def save doc, index
19
+ File.open "postings/job_posting#{index}.txt", 'w' do |f|
20
+ f.write doc
21
+ end
22
+ end
23
+ end
@@ -0,0 +1 @@
1
+ VERSION = '0.0.1'
@@ -0,0 +1 @@
1
+ <div><ul><li><p> ninja <em>10x</em> wanted for ninja douchenozzle team</p></li></ul></div>
@@ -0,0 +1,32 @@
1
+ require 'minitest/autorun'
2
+ require_relative '../lib/parser'
3
+
4
+ class TestParser < Minitest::Test
5
+ def setup
6
+ @parser = Parser.new 'test/files/job_posting.txt'
7
+ @hash = { '10x' => 1, 'ninja' => 3, 'douchenozzle' => 2 }
8
+ end
9
+
10
+ def test_that_HTML_tags_are_stripped
11
+ assert_equal 'ninja 10x wanted for ninja douchenozzle team', @parser.strip_tags
12
+ end
13
+
14
+ def test_that_words_are_extracted
15
+ assert_equal ["ninja", "10x", "wanted", "for", "ninja", "douchenozzle", "team"], @parser.extract_words
16
+ end
17
+
18
+ def test_that_words_are_counted
19
+ array = ['10x', 'ninja', 'douchenozzle', 'ninja', 'douchenozzle', 'ninja']
20
+ assert_equal @hash, @parser.count_words(array)
21
+ end
22
+
23
+ def test_that_the_hash_is_ordered
24
+ array = [['10x', 1], ['douchenozzle', 2], ['ninja', 3]]
25
+ assert_equal array, @parser.order_hash(@hash)
26
+ end
27
+
28
+ def test_that_nouns_are_extracted
29
+ assert_equal ["team"], @parser.extract_nouns
30
+ end
31
+ end
32
+
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: juxtaparssionate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Thomas Osborn
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: treat
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rwordnet
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: sanitize
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: juxtaparssionate parses and compares job postings from various popular
84
+ job boards. By comparing word frequency, sentence length, and parts-of-speech counts,
85
+ juxtaparssionate produces metrics loosely determining the quality and uniqueness
86
+ of a job posting
87
+ email: trosborn@gmail.com
88
+ executables: []
89
+ extensions: []
90
+ extra_rdoc_files: []
91
+ files:
92
+ - README.rdoc
93
+ - lib/parser.rb
94
+ - lib/scraper.rb
95
+ - lib/version.rb
96
+ - test/files/job_posting.txt
97
+ - test/test_parser.rb
98
+ homepage: http://www.github.com/trosborn/juxtaparssionate
99
+ licenses:
100
+ - MIT
101
+ metadata: {}
102
+ post_install_message:
103
+ rdoc_options: []
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ! '>='
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ requirements: []
117
+ rubyforge_project:
118
+ rubygems_version: 2.4.5
119
+ signing_key:
120
+ specification_version: 4
121
+ summary: A tool for parsing and comparing job postings
122
+ test_files: []