wiki_top_page_views 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/top_page +35 -0
  3. data/lib/wiki_top_page_views.rb +45 -0
  4. metadata +89 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 43816808994eb7d2a80df7f9fc82d2627bb52123
4
+ data.tar.gz: 6332c7daed022a743bd9d20108c67bee69336a57
5
+ SHA512:
6
+ metadata.gz: 5a8ef1914baa45270775d298d1eb2b83fc14458064951bfdbf5666ee51d644aaaa7805c8b87fe708d3f425d6e235597517ce6a9ae8801c1572b49352ec44e9fb
7
+ data.tar.gz: 6550324ac6e9b30a1bddefcc9fa6e48abfee19e6a056e22bc6f079199912fce3b71c222a82330ba92d5126d0357cbd261967ec2d23340a7edb2cb7274c8418fd
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative "../lib/wiki_top_page_views"
3
+ require 'optparse'
4
+
5
+ options = {}
6
+
7
+ opt_parser = OptionParser.new do |opt|
8
+
9
+
10
+ opt.on("-f","--file FILE","the file name") do |file|
11
+ options[:file] = file
12
+ end
13
+
14
+ opt.on("-l","--language LANGUAGE","the two letter abbreviation for the language") do |lang|
15
+ options[:lang] = lang
16
+ end
17
+
18
+ opt.on("-c","--count COUNT","the count of top subjects") do |count|
19
+ options[:count] = count
20
+ end
21
+
22
+ opt.on("-h","--help","help") do
23
+ puts opt_parser
24
+ exit
25
+ end
26
+ end
27
+
28
+ opt_parser.parse!
29
+ file = options[:file]
30
+ lang = options[:lang] || "en"
31
+ count = options[:count] || 10
32
+
33
+ f = Find.new({file: file, lang: lang, count: count})
34
+ p f.top_topics
35
+
@@ -0,0 +1,45 @@
1
+
2
+ class Find
3
+ def initialize(opts={})
4
+ @lang = opts[:lang] || "en"
5
+ @top_num = opts[:top_num] || 10
6
+ @file = opts[:file]
7
+ end
8
+
9
+ def open_file
10
+ puts "opening file: #{@file}"
11
+ lang_array = IO.readlines(@file)
12
+ lang_array.map! {|s| s.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '.') }
13
+ end
14
+
15
+ def find_lang
16
+ lang_array = open_file
17
+ puts "finding language #{@lang}"
18
+ lang_array.select! { |line| /\A#{@lang}/i =~ line}
19
+ lang_array.map! {|s| s.split(" ")}
20
+ lang_array.select! {|line| line[1] !~ /^File|^Special/i}
21
+ space = lang_array.select {|line| line[4] != nil}
22
+ lang_array.delete_if {|line| line[4] != nil}
23
+ space.map! {|line| "#{line[0]}, #{line[1] + line[2]}, #{line[3]}, #{line[4]}".split(", ")}
24
+ return lang_array.concat(space)
25
+ end
26
+
27
+ def sort_array
28
+ lang_array = find_lang
29
+ puts "sorting file"
30
+ sort_a = lang_array.sort_by {|array| -array[2].to_i}
31
+ return sort_a
32
+ end
33
+
34
+ def top_topics
35
+ s_array = sort_array
36
+ puts "getting top #{@top_num}"
37
+ top_a = s_array.first(@top_num).map!{ |line| "Topic: " + line[1] + "," + " Visits: " + line[2] }
38
+ return top_a
39
+ end
40
+
41
+ end
42
+
43
+
44
+
45
+
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wiki_top_page_views
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Katie Atrops
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 3.1.0
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 3.1.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 10.4.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 10.4.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec-encoding-matchers
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.1.0
55
+ description: This gem parses data from files at http://dumps.wikimedia.org/other/pagecounts-raw/
56
+ to get the top viewed pages
57
+ email: katrops@gmail.com
58
+ executables:
59
+ - top_page
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - bin/top_page
64
+ - lib/wiki_top_page_views.rb
65
+ homepage: http://rubygems.org/gems/wiki_top_page_views
66
+ licenses:
67
+ - MIT
68
+ metadata: {}
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 2.2.2
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Returns the top viewed wikipedia pages
89
+ test_files: []