wiki_top_page_views 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/top_page +35 -0
  3. data/lib/wiki_top_page_views.rb +45 -0
  4. metadata +89 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 43816808994eb7d2a80df7f9fc82d2627bb52123
4
+ data.tar.gz: 6332c7daed022a743bd9d20108c67bee69336a57
5
+ SHA512:
6
+ metadata.gz: 5a8ef1914baa45270775d298d1eb2b83fc14458064951bfdbf5666ee51d644aaaa7805c8b87fe708d3f425d6e235597517ce6a9ae8801c1572b49352ec44e9fb
7
+ data.tar.gz: 6550324ac6e9b30a1bddefcc9fa6e48abfee19e6a056e22bc6f079199912fce3b71c222a82330ba92d5126d0357cbd261967ec2d23340a7edb2cb7274c8418fd
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative "../lib/wiki_top_page_views"
3
+ require 'optparse'
4
+
5
+ options = {}
6
+
7
+ opt_parser = OptionParser.new do |opt|
8
+
9
+
10
+ opt.on("-f","--file FILE","the file name") do |file|
11
+ options[:file] = file
12
+ end
13
+
14
+ opt.on("-l","--language LANGUAGE","the two letter abbreviation for the language") do |lang|
15
+ options[:lang] = lang
16
+ end
17
+
18
+ opt.on("-c","--count COUNT","the count of top subjects") do |count|
19
+ options[:count] = count
20
+ end
21
+
22
+ opt.on("-h","--help","help") do
23
+ puts opt_parser
24
+ exit
25
+ end
26
+ end
27
+
28
+ opt_parser.parse!
29
+ file = options[:file]
30
+ lang = options[:lang] || "en"
31
+ count = options[:count] || 10
32
+
33
+ f = Find.new({file: file, lang: lang, count: count})
34
+ p f.top_topics
35
+
@@ -0,0 +1,45 @@
1
+
2
+ class Find
3
+ def initialize(opts={})
4
+ @lang = opts[:lang] || "en"
5
+ @top_num = opts[:top_num] || 10
6
+ @file = opts[:file]
7
+ end
8
+
9
+ def open_file
10
+ puts "opening file: #{@file}"
11
+ lang_array = IO.readlines(@file)
12
+ lang_array.map! {|s| s.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '.') }
13
+ end
14
+
15
+ def find_lang
16
+ lang_array = open_file
17
+ puts "finding language #{@lang}"
18
+ lang_array.select! { |line| /\A#{@lang}/i =~ line}
19
+ lang_array.map! {|s| s.split(" ")}
20
+ lang_array.select! {|line| line[1] !~ /^File|^Special/i}
21
+ space = lang_array.select {|line| line[4] != nil}
22
+ lang_array.delete_if {|line| line[4] != nil}
23
+ space.map! {|line| "#{line[0]}, #{line[1] + line[2]}, #{line[3]}, #{line[4]}".split(", ")}
24
+ return lang_array.concat(space)
25
+ end
26
+
27
+ def sort_array
28
+ lang_array = find_lang
29
+ puts "sorting file"
30
+ sort_a = lang_array.sort_by {|array| -array[2].to_i}
31
+ return sort_a
32
+ end
33
+
34
+ def top_topics
35
+ s_array = sort_array
36
+ puts "getting top #{@top_num}"
37
+ top_a = s_array.first(@top_num).map!{ |line| "Topic: " + line[1] + "," + " Visits: " + line[2] }
38
+ return top_a
39
+ end
40
+
41
+ end
42
+
43
+
44
+
45
+
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wiki_top_page_views
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Katie Atrops
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 3.1.0
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 3.1.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 10.4.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 10.4.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec-encoding-matchers
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.1.0
55
+ description: This gem parses data from files at http://dumps.wikimedia.org/other/pagecounts-raw/
56
+ to get the top viewed pages
57
+ email: katrops@gmail.com
58
+ executables:
59
+ - top_page
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - bin/top_page
64
+ - lib/wiki_top_page_views.rb
65
+ homepage: http://rubygems.org/gems/wiki_top_page_views
66
+ licenses:
67
+ - MIT
68
+ metadata: {}
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 2.2.2
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Returns the top viewed wikipedia pages
89
+ test_files: []