gallicagram 1.0.1 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/gallicagram.rb +52 -24
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc880e4aa64ab9c8cb35d2dbb9f2fadb7d5280195470ccbb2b9cb76bfffb4501
4
- data.tar.gz: 9c5cf57821a16e16ce13b33c6ebeb1fde5386a16d78f28250bf03be346d981bf
3
+ metadata.gz: 550bf9ebdcf99a41e01e157ba09dc5e864ef4fc531036b4afdd0501a4d5c4ad3
4
+ data.tar.gz: 6a18e0c6719baef37699c5035c7abc53008cca58040368fdbcf56ccb687de9f1
5
5
  SHA512:
6
- metadata.gz: 3bf34772e3c3828fb08da67e83d8c71f2042ae9eb7edfd5d18af0311e991c172c4561242c72eaef878528af896c2fbbbc5849578d6143c277d5800ede99fded6
7
- data.tar.gz: 62d37214f2d572629b45352e93d675a9276ed9c98e95117dd30e7462dd0a70d8dfe3d448e42dbb94fb987bee32afd71ab5aa6b96b35de2b6407f8c9eab1eb565
6
+ metadata.gz: bbdb13303e02606ac3eea5911787815e5f52a0081adc59f5a7c96cfcfa962ca4edfb49dc2a93c020f973771bd18facd8b10812cb9dddad7b24f0404d9ae82c74
7
+ data.tar.gz: 428ddb548775277156091035b04f9d72f3b4549ea39a9842bbc7294a2a93e081e6ce65db402dba921dd119235e4b3a3d9f8e3e9aeab415907a9876d243a91af2
data/lib/gallicagram.rb CHANGED
@@ -2,44 +2,72 @@ class Gallicagram
2
2
 
3
3
  require 'open-uri'
4
4
  require 'cgi'
5
+ require 'csv'
5
6
 
6
7
  # query = string or array of strings
7
8
  # corpus = ["lemonde","livres","presse"]
8
9
  # resolution = ["year", "month"]
9
- def self.search(query,corpus="lemonde",start_date="1900",end_date="2000",resolution="month",sum=false)
10
+ def self.search(query,corpus="lemonde",start_date="1900",end_date="2000",resolution="month",sum=false)
10
11
 
11
- query = format_query(query, sum)
12
+ query = [query] unless query.kind_of?(Array)
13
+ output = ""
14
+ query.each_with_index do |word, index|
15
+ query = format_query(word, sum)
16
+ response = call_api(word, corpus, start_date, end_date)
17
+ unless index == 0
18
+ response = response.gsub("n,gram,annee,mois,jour,total", "\n").strip
19
+ end
20
+ output << response
21
+ end
12
22
 
13
- # We search through the Gallicagram API
14
- url = "https://shiny.ens-paris-saclay.fr/guni/corpus=#{corpus}_#{query}_from=#{start_date}_to=#{end_date}"
23
+ if corpus == "livres" && resolution == "month"
24
+ resolution = "year"
25
+ end
15
26
 
16
- response = URI.open(url)
27
+ data = group_by_resolution(output, resolution)
17
28
 
18
- return response.read # to read the output: CSV.parse(response.read)
19
- end
29
+ return data
30
+ end
20
31
  end
21
32
 
22
33
  private
23
34
 
24
35
  def format_query(query, sum)
25
- if query.kind_of?(Array)
26
- query_string = ""
27
- query.each_with_index do |word, i|
28
- word = CGI.escape(word.strip.gsub(" ", "%20"))
29
- if i == 0
30
- query_string = query_string + word
31
- else
32
- if sum == true
33
- query_string = query_string + "+" + word
34
- else
35
- query_string = query_string + "&" + word
36
- end
37
- end
38
- end
39
- else
40
- query_string = CGI.escape(query.strip.gsub(" ", "%20"))
41
- end
36
+
37
+ query_string = CGI.escape(query.strip.gsub(" ", "%20"))
42
38
 
43
39
  return query_string
44
40
 
41
+ end
42
+
43
+ def call_api(query, corpus, start_date, end_date)
44
+ # We search through the Gallicagram API
45
+ url = "https://shiny.ens-paris-saclay.fr/guni/query?corpus=#{corpus}&mot=#{CGI.escape(query)}&from=#{start_date}&to=#{end_date}"
46
+
47
+ response = URI.open(url)
48
+
49
+ return response.read
50
+ end
51
+
52
+ def group_by_resolution(data, resolution)
53
+ csv_parsing = CSV.parse(data, :headers => true)
54
+ clean_data_array = []
55
+ case resolution
56
+ when "year"
57
+ csv_parsing.group_by { |word| [word["annee"], word["gram"]] }.each do |cp|
58
+ sum = cp[1].sum { |word| word["total"].to_i }
59
+ new_row = cp[0]
60
+ new_row << sum
61
+ clean_data_array << new_row
62
+ end
63
+ when "month"
64
+ csv_parsing.group_by { |word| [word["annee"], word["mois"], word["gram"]] }.each do |cp|
65
+ sum = cp[1].sum { |word| word["total"].to_i }
66
+ new_row = cp[0]
67
+ new_row << sum
68
+ clean_data_array << new_row
69
+ end
70
+ end
71
+
72
+ return clean_data_array
45
73
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gallicagram
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nicolas Le Roux
@@ -64,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
64
64
  - !ruby/object:Gem::Version
65
65
  version: '0'
66
66
  requirements: []
67
- rubygems_version: 3.3.7
67
+ rubygems_version: 3.4.12
68
68
  signing_key:
69
69
  specification_version: 3
70
70
  summary: A ruby gem to manage Gallicagram data