google-ngrams 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_ngrams-0.1.0/google_ngrams.egg-info → google_ngrams-0.1.1}/PKG-INFO +3 -2
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/README.rst +1 -1
- google_ngrams-0.1.1/_quarto/.gitignore +1 -0
- google_ngrams-0.1.1/_quarto/_extensions/machow/interlinks/.gitignore +3 -0
- google_ngrams-0.1.1/_quarto/_extensions/machow/interlinks/_extension.yml +7 -0
- google_ngrams-0.1.1/_quarto/_extensions/machow/interlinks/interlinks.lua +254 -0
- google_ngrams-0.1.1/_quarto/_quarto.yml +83 -0
- google_ngrams-0.1.1/_quarto/_site/get-started.html +801 -0
- google_ngrams-0.1.1/_quarto/_site/get-started_files/figure-html/cell-12-output-1.png +0 -0
- google_ngrams-0.1.1/_quarto/_site/get-started_files/figure-html/cell-9-output-1.png +0 -0
- google_ngrams-0.1.1/_quarto/_site/logo.png +0 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/bootstrap/bootstrap-icons.css +2078 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/bootstrap/bootstrap-icons.woff +0 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/bootstrap/bootstrap.min.css +12 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/bootstrap/bootstrap.min.js +7 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/clipboard/clipboard.min.js +7 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/anchor.min.js +9 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/popper.min.js +6 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/quarto-syntax-highlighting.css +203 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/quarto.js +899 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/tippy.css +1 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/tippy.umd.min.js +2 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-nav/quarto-nav.js +288 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-search/autocomplete.umd.js +3 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-search/fuse.min.js +9 -0
- google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-search/quarto-search.js +1247 -0
- google_ngrams-0.1.1/_quarto/get-started.qmd +86 -0
- google_ngrams-0.1.1/_quarto/index.ipynb +132 -0
- google_ngrams-0.1.1/_quarto/index.qmd +71 -0
- google_ngrams-0.1.1/_quarto/logo.png +0 -0
- google_ngrams-0.1.1/_quarto/objects.json +1 -0
- google_ngrams-0.1.1/_quarto/reference/cluster_summary.qmd +13 -0
- google_ngrams-0.1.1/_quarto/reference/google_ngram.qmd +25 -0
- google_ngrams-0.1.1/_quarto/reference/index.qmd +22 -0
- google_ngrams-0.1.1/_quarto/reference/timeviz_barplot.qmd +33 -0
- google_ngrams-0.1.1/_quarto/reference/timeviz_scatterplot.qmd +32 -0
- google_ngrams-0.1.1/_quarto/reference/timeviz_screeplot.qmd +29 -0
- google_ngrams-0.1.1/_quarto/reference/timeviz_vnc.qmd +40 -0
- google_ngrams-0.1.1/_quarto/references.bib +22 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/docs/google_ngrams.ipynb +22 -15
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/vnc.py +38 -79
- {google_ngrams-0.1.0 → google_ngrams-0.1.1/google_ngrams.egg-info}/PKG-INFO +3 -2
- google_ngrams-0.1.1/google_ngrams.egg-info/SOURCES.txt +56 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams.egg-info/requires.txt +1 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/pyproject.toml +2 -1
- google_ngrams-0.1.0/_quarto/_quarto.yml +0 -104
- google_ngrams-0.1.0/google_ngrams.egg-info/SOURCES.txt +0 -20
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/.github/workflows/ci.yml +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/.gitignore +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/LICENSE +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/docs/.gitkeep +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/__init__.py +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/data/__init__.py +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/data/googlebooks_eng_all_totalcounts_20120701.parquet +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/data/googlebooks_eng_gb_all_totalcounts_20120701.parquet +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/data/googlebooks_eng_us_all_totalcounts_20120701.parquet +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/ngrams.py +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams.egg-info/dependency_links.txt +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams.egg-info/top_level.txt +0 -0
- {google_ngrams-0.1.0 → google_ngrams-0.1.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: google_ngrams
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: Fetch and analyze Google Ngram data for specified word forms.
|
5
5
|
Author-email: David Brown <dwb2@andrew.cmu.edu>
|
6
6
|
Maintainer-email: David Brown <dwb2@andrew.cmu.edu>
|
@@ -20,6 +20,7 @@ Requires-Dist: importlib-resources>=6.5
|
|
20
20
|
Requires-Dist: matplotlib>=3.5
|
21
21
|
Requires-Dist: polars>=1.17
|
22
22
|
Requires-Dist: scipy>=1.15
|
23
|
+
Requires-Dist: statsmodels>=0.14
|
23
24
|
|
24
25
|
|
25
26
|
google_ngrams: Fetch and analyze Google Ngram data for specified word forms.
|
@@ -51,7 +52,7 @@ You can install the released version of google_ngrams from `PyPI <https://pypi.o
|
|
51
52
|
|
52
53
|
.. code-block:: install-google_ngrams
|
53
54
|
|
54
|
-
pip install
|
55
|
+
pip install google-ngrams
|
55
56
|
|
56
57
|
|
57
58
|
Usage
|
@@ -0,0 +1 @@
|
|
1
|
+
/.quarto/
|
@@ -0,0 +1,254 @@
|
|
1
|
+
local function read_inv_text(filename)
|
2
|
+
-- read file
|
3
|
+
local file = io.open(filename, "r")
|
4
|
+
if file == nil then
|
5
|
+
return nil
|
6
|
+
end
|
7
|
+
local str = file:read("a")
|
8
|
+
file:close()
|
9
|
+
|
10
|
+
|
11
|
+
local project = str:match("# Project: (%S+)")
|
12
|
+
local version = str:match("# Version: (%S+)")
|
13
|
+
|
14
|
+
local data = {project = project, version = version, items = {}}
|
15
|
+
|
16
|
+
local ptn_data =
|
17
|
+
"^" ..
|
18
|
+
"(.-)%s+" .. -- name
|
19
|
+
"([%S:]-):" .. -- domain
|
20
|
+
"([%S]+)%s+" .. -- role
|
21
|
+
"(%-?%d+)%s+" .. -- priority
|
22
|
+
"(%S*)%s+" .. -- uri
|
23
|
+
"(.-)\r?$" -- dispname
|
24
|
+
|
25
|
+
|
26
|
+
-- Iterate through each line in the file content
|
27
|
+
for line in str:gmatch("[^\r\n]+") do
|
28
|
+
if not line:match("^#") then
|
29
|
+
-- Match each line against the pattern
|
30
|
+
local name, domain, role, priority, uri, dispName = line:match(ptn_data)
|
31
|
+
|
32
|
+
-- if name is nil, raise an error
|
33
|
+
if name == nil then
|
34
|
+
error("Error parsing line: " .. line)
|
35
|
+
end
|
36
|
+
|
37
|
+
data.items[#data.items + 1] = {
|
38
|
+
name = name,
|
39
|
+
domain = domain,
|
40
|
+
role = role,
|
41
|
+
priority = priority,
|
42
|
+
uri = uri,
|
43
|
+
dispName = dispName
|
44
|
+
}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
return data
|
48
|
+
end
|
49
|
+
|
50
|
+
local function read_json(filename)
|
51
|
+
|
52
|
+
local file = io.open(filename, "r")
|
53
|
+
if file == nil then
|
54
|
+
return nil
|
55
|
+
end
|
56
|
+
local str = file:read("a")
|
57
|
+
file:close()
|
58
|
+
|
59
|
+
local decoded = quarto.json.decode(str)
|
60
|
+
return decoded
|
61
|
+
end
|
62
|
+
|
63
|
+
local function read_inv_text_or_json(base_name)
|
64
|
+
local file = io.open(base_name .. ".txt", "r")
|
65
|
+
if file then
|
66
|
+
-- TODO: refactors so we don't just close the file immediately
|
67
|
+
io.close(file)
|
68
|
+
json = read_inv_text(base_name .. ".txt")
|
69
|
+
|
70
|
+
else
|
71
|
+
json = read_json(base_name .. ".json")
|
72
|
+
end
|
73
|
+
|
74
|
+
return json
|
75
|
+
end
|
76
|
+
|
77
|
+
local inventory = {}
|
78
|
+
|
79
|
+
local function lookup(search_object)
|
80
|
+
|
81
|
+
local results = {}
|
82
|
+
for _, inv in ipairs(inventory) do
|
83
|
+
for _, item in ipairs(inv.items) do
|
84
|
+
-- e.g. :external+<inv_name>:<domain>:<role>:`<name>`
|
85
|
+
if item.inv_name and item.inv_name ~= search_object.inv_name then
|
86
|
+
goto continue
|
87
|
+
end
|
88
|
+
|
89
|
+
if item.name ~= search_object.name then
|
90
|
+
goto continue
|
91
|
+
end
|
92
|
+
|
93
|
+
if search_object.role and item.role ~= search_object.role then
|
94
|
+
goto continue
|
95
|
+
end
|
96
|
+
|
97
|
+
if search_object.domain and item.domain ~= search_object.domain then
|
98
|
+
goto continue
|
99
|
+
else
|
100
|
+
if search_object.domain or item.domain == "py" then
|
101
|
+
table.insert(results, item)
|
102
|
+
end
|
103
|
+
|
104
|
+
goto continue
|
105
|
+
end
|
106
|
+
|
107
|
+
::continue::
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
if #results == 1 then
|
112
|
+
return results[1]
|
113
|
+
end
|
114
|
+
if #results > 1 then
|
115
|
+
quarto.log.warning("Found multiple matches for " .. search_object.name .. ", using the first match.")
|
116
|
+
return results[1]
|
117
|
+
end
|
118
|
+
if #results == 0 then
|
119
|
+
quarto.log.warning("Found no matches for object:\n", search_object)
|
120
|
+
end
|
121
|
+
|
122
|
+
return nil
|
123
|
+
end
|
124
|
+
|
125
|
+
local function mysplit (inputstr, sep)
|
126
|
+
if sep == nil then
|
127
|
+
sep = "%s"
|
128
|
+
end
|
129
|
+
local t={}
|
130
|
+
for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
|
131
|
+
table.insert(t, str)
|
132
|
+
end
|
133
|
+
return t
|
134
|
+
end
|
135
|
+
|
136
|
+
local function normalize_role(role)
|
137
|
+
if role == "func" then
|
138
|
+
return "function"
|
139
|
+
end
|
140
|
+
return role
|
141
|
+
end
|
142
|
+
|
143
|
+
local function build_search_object(str)
|
144
|
+
local starts_with_colon = str:sub(1, 1) == ":"
|
145
|
+
local search = {}
|
146
|
+
if starts_with_colon then
|
147
|
+
local t = mysplit(str, ":")
|
148
|
+
if #t == 2 then
|
149
|
+
-- e.g. :py:func:`my_func`
|
150
|
+
search.role = normalize_role(t[1])
|
151
|
+
search.name = t[2]:match("%%60(.*)%%60")
|
152
|
+
elseif #t == 3 then
|
153
|
+
-- e.g. :py:func:`my_func`
|
154
|
+
search.domain = t[1]
|
155
|
+
search.role = normalize_role(t[2])
|
156
|
+
search.name = t[3]:match("%%60(.*)%%60")
|
157
|
+
elseif #t == 4 then
|
158
|
+
-- e.g. :ext+inv:py:func:`my_func`
|
159
|
+
search.external = true
|
160
|
+
|
161
|
+
search.inv_name = t[1]:match("external%+(.*)")
|
162
|
+
search.domain = t[2]
|
163
|
+
search.role = normalize_role(t[3])
|
164
|
+
search.name = t[4]:match("%%60(.*)%%60")
|
165
|
+
else
|
166
|
+
quarto.log.warning("couldn't parse this link: " .. str)
|
167
|
+
return {}
|
168
|
+
end
|
169
|
+
else
|
170
|
+
search.name = str:match("%%60(.*)%%60")
|
171
|
+
end
|
172
|
+
|
173
|
+
if search.name == nil then
|
174
|
+
quarto.log.warning("couldn't parse this link: " .. str)
|
175
|
+
return {}
|
176
|
+
end
|
177
|
+
|
178
|
+
if search.name:sub(1, 1) == "~" then
|
179
|
+
search.shortened = true
|
180
|
+
search.name = search.name:sub(2, -1)
|
181
|
+
end
|
182
|
+
return search
|
183
|
+
end
|
184
|
+
|
185
|
+
local function report_broken_link(link, search_object, replacement)
|
186
|
+
-- TODO: how to unescape html elements like [?
|
187
|
+
return pandoc.Code(pandoc.utils.stringify(link.content))
|
188
|
+
end
|
189
|
+
|
190
|
+
function Link(link)
|
191
|
+
-- do not process regular links ----
|
192
|
+
if not link.target:match("%%60") then
|
193
|
+
return link
|
194
|
+
end
|
195
|
+
|
196
|
+
-- lookup item ----
|
197
|
+
local search = build_search_object(link.target)
|
198
|
+
local item = lookup(search)
|
199
|
+
|
200
|
+
-- determine replacement, used if no link text specified ----
|
201
|
+
local original_text = pandoc.utils.stringify(link.content)
|
202
|
+
local replacement = search.name
|
203
|
+
if search.shortened then
|
204
|
+
local t = mysplit(search.name, ".")
|
205
|
+
replacement = t[#t]
|
206
|
+
end
|
207
|
+
|
208
|
+
-- set link text ----
|
209
|
+
if original_text == "" and replacement ~= nil then
|
210
|
+
link.content = pandoc.Code(replacement)
|
211
|
+
end
|
212
|
+
|
213
|
+
-- report broken links ----
|
214
|
+
if item == nil then
|
215
|
+
return report_broken_link(link, search)
|
216
|
+
end
|
217
|
+
link.target = item.uri:gsub("%$$", search.name)
|
218
|
+
|
219
|
+
|
220
|
+
return link
|
221
|
+
end
|
222
|
+
|
223
|
+
local function fixup_json(json, prefix)
|
224
|
+
for _, item in ipairs(json.items) do
|
225
|
+
item.uri = prefix .. item.uri
|
226
|
+
end
|
227
|
+
table.insert(inventory, json)
|
228
|
+
end
|
229
|
+
|
230
|
+
return {
|
231
|
+
{
|
232
|
+
Meta = function(meta)
|
233
|
+
local json
|
234
|
+
local prefix
|
235
|
+
if meta.interlinks and meta.interlinks.sources then
|
236
|
+
for k, v in pairs(meta.interlinks.sources) do
|
237
|
+
local base_name = quarto.project.offset .. "/_inv/" .. k .. "_objects"
|
238
|
+
json = read_inv_text_or_json(base_name)
|
239
|
+
prefix = pandoc.utils.stringify(v.url)
|
240
|
+
if json ~= nil then
|
241
|
+
fixup_json(json, prefix)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
json = read_inv_text_or_json(quarto.project.offset .. "/objects")
|
246
|
+
if json ~= nil then
|
247
|
+
fixup_json(json, "/")
|
248
|
+
end
|
249
|
+
end
|
250
|
+
},
|
251
|
+
{
|
252
|
+
Link = Link
|
253
|
+
}
|
254
|
+
}
|
@@ -0,0 +1,83 @@
|
|
1
|
+
project:
|
2
|
+
type: website
|
3
|
+
output-dir: _site
|
4
|
+
|
5
|
+
website:
|
6
|
+
title: "google_ngrams"
|
7
|
+
description: "Fetch and analyze Google Ngram data for specified word forms."
|
8
|
+
page-navigation: true
|
9
|
+
favicon: "favicon.ico"
|
10
|
+
navbar:
|
11
|
+
background: light
|
12
|
+
pinned: true
|
13
|
+
logo: logo.png
|
14
|
+
left:
|
15
|
+
- text: "Get started"
|
16
|
+
file: get-started.qmd
|
17
|
+
- text: "TimeSeries"
|
18
|
+
file: time-series.qmd
|
19
|
+
- text: "Reference"
|
20
|
+
file: reference/index.qmd
|
21
|
+
- text: Learn more
|
22
|
+
menu:
|
23
|
+
- text: "VNC Clustering for R"
|
24
|
+
href: https://cran.r-project.org/web/packages/pseudobibeR/index.html
|
25
|
+
target: _blank
|
26
|
+
right:
|
27
|
+
- icon: github
|
28
|
+
href: https://github.com/browndw/google_ngrams
|
29
|
+
aria-label: google_ngrams on GitHub
|
30
|
+
sidebar:
|
31
|
+
style: "floating"
|
32
|
+
collapse-level: 1
|
33
|
+
contents:
|
34
|
+
- section: Fetching Data
|
35
|
+
contents:
|
36
|
+
- text: "`google_ngram`"
|
37
|
+
href: reference/google_ngram.qmd
|
38
|
+
- section: Time Series
|
39
|
+
contents:
|
40
|
+
- text: "`timeviz_barplot`"
|
41
|
+
href: reference/timeviz_barplot.qmd
|
42
|
+
- text: "`timeviz_scatterplot`"
|
43
|
+
href: reference/timeviz_scatterplot.qmd
|
44
|
+
- text: "`timeviz_screeplot`"
|
45
|
+
href: reference/timeviz_screeplot.qmd
|
46
|
+
- text: "`timeviz_screeplot`"
|
47
|
+
href: reference/timeviz_screeplot.qmd
|
48
|
+
- text: "`timeviz_vnc`"
|
49
|
+
href: reference/timeviz_vnc.qmd
|
50
|
+
- text: "`cluster_summary`"
|
51
|
+
href: reference/cluster_summary.qmd
|
52
|
+
|
53
|
+
bibliography: references.bib
|
54
|
+
|
55
|
+
format:
|
56
|
+
html:
|
57
|
+
sidebar: false
|
58
|
+
|
59
|
+
quartodoc:
|
60
|
+
title: Reference
|
61
|
+
package: google_ngrams
|
62
|
+
sections:
|
63
|
+
- title: google_ngrams fetch function
|
64
|
+
desc: "Read in Google n-gram data"
|
65
|
+
package: google_ngrams.ngrams
|
66
|
+
contents:
|
67
|
+
- google_ngram
|
68
|
+
- title: google_ngrams TimeSeries
|
69
|
+
desc: "Analyze time series data"
|
70
|
+
package: google_ngrams.TimeSeries
|
71
|
+
contents:
|
72
|
+
- timeviz_barplot
|
73
|
+
- timeviz_scatterplot
|
74
|
+
- timeviz_screeplot
|
75
|
+
- timeviz_screeplot
|
76
|
+
- timeviz_vnc
|
77
|
+
- cluster_summary
|
78
|
+
|
79
|
+
filters:
|
80
|
+
- interlinks
|
81
|
+
|
82
|
+
interlinks:
|
83
|
+
sources: {}
|