gallicagram 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gallicagram.rb +19 -26
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fc701174f91b6d0af5092efa9944eccd1abdb6981fb9ac396240b62e4457cc8f
|
4
|
+
data.tar.gz: 027772ca247ea0be65812aed3607c2c705a07a424bfa2e5d21ad4eefc6854462
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5818bd7812626868cc83bfd9f7c251f5a40eb1a4850823528e676815f7780aeab820ef02c10d55f6086559af17555c9cf5a59abf98c9c376f4f7b11a9063afc1
|
7
|
+
data.tar.gz: eae5a2a87b17d7b293e408eb27c4ad9bda093fdcc818a07ac084bc5f49ad939077413d8493c4caf91dcb5b0aa8a6d3f4f49170a2a2110664750d4256e6a1add0
|
data/lib/gallicagram.rb
CHANGED
@@ -6,25 +6,27 @@ class Gallicagram
|
|
6
6
|
|
7
7
|
# query = string or array of strings
|
8
8
|
# corpus = ["lemonde","livres","presse"]
|
9
|
-
# resolution = ["
|
10
|
-
def self.search(query,corpus="lemonde",start_date="1900",end_date="2000",resolution="
|
9
|
+
# resolution = ["mois", "annee"]
|
10
|
+
def self.search(query,corpus="lemonde",start_date="1900",end_date="2000",resolution="annee",sum=false)
|
11
11
|
|
12
12
|
query = [query] unless query.kind_of?(Array)
|
13
|
-
|
13
|
+
data = ""
|
14
14
|
query.each_with_index do |word, index|
|
15
15
|
query = format_query(word, sum)
|
16
|
-
response = call_api(word, corpus, start_date, end_date)
|
16
|
+
response = call_api(word, corpus, start_date, end_date,resolution)
|
17
17
|
unless index == 0
|
18
18
|
response = response.gsub("n,gram,annee,mois,jour,total", "\n").strip
|
19
19
|
end
|
20
|
-
|
20
|
+
data << response
|
21
21
|
end
|
22
22
|
|
23
|
-
if corpus == "livres" && resolution == "
|
24
|
-
resolution = "
|
23
|
+
if corpus == "livres" && resolution == "mois"
|
24
|
+
resolution = "annee"
|
25
25
|
end
|
26
26
|
|
27
|
-
|
27
|
+
if resolution == "mois"
|
28
|
+
data = group_by_resolution(data)
|
29
|
+
end
|
28
30
|
|
29
31
|
return data
|
30
32
|
end
|
@@ -40,33 +42,24 @@ def format_query(query, sum)
|
|
40
42
|
|
41
43
|
end
|
42
44
|
|
43
|
-
def call_api(query, corpus, start_date, end_date)
|
45
|
+
def call_api(query, corpus, start_date, end_date, resolution)
|
44
46
|
# We search through the Gallicagram API
|
45
|
-
url = "https://shiny.ens-paris-saclay.fr/guni/query?corpus=#{corpus}&mot=#{CGI.escape(query)}&from=#{start_date}&to=#{end_date}"
|
47
|
+
url = "https://shiny.ens-paris-saclay.fr/guni/query?corpus=#{corpus}&mot=#{CGI.escape(query)}&from=#{start_date}&to=#{end_date}&resolution=#{resolution}"
|
46
48
|
|
47
49
|
response = URI.open(url)
|
48
50
|
|
49
51
|
return response.read
|
50
52
|
end
|
51
53
|
|
52
|
-
def group_by_resolution(data
|
54
|
+
def group_by_resolution(data)
|
53
55
|
csv_parsing = CSV.parse(data, :headers => true)
|
54
56
|
clean_data_array = []
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
clean_data_array << new_row
|
62
|
-
end
|
63
|
-
when "month"
|
64
|
-
csv_parsing.group_by { |word| [word["annee"], word["mois"], word["gram"]] }.each do |cp|
|
65
|
-
sum = cp[1].sum { |word| word["total"].to_i }
|
66
|
-
new_row = cp[0]
|
67
|
-
new_row << sum
|
68
|
-
clean_data_array << new_row
|
69
|
-
end
|
57
|
+
|
58
|
+
csv_parsing.group_by { |word| [word["annee"], word["mois"], word["gram"]] }.each do |cp|
|
59
|
+
sum = cp[1].sum { |word| word["total"].to_i }
|
60
|
+
new_row = cp[0]
|
61
|
+
new_row << sum
|
62
|
+
clean_data_array << new_row
|
70
63
|
end
|
71
64
|
|
72
65
|
return clean_data_array
|