google-ngrams 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {google_ngrams-0.1.0/google_ngrams.egg-info → google_ngrams-0.1.1}/PKG-INFO +3 -2
  2. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/README.rst +1 -1
  3. google_ngrams-0.1.1/_quarto/.gitignore +1 -0
  4. google_ngrams-0.1.1/_quarto/_extensions/machow/interlinks/.gitignore +3 -0
  5. google_ngrams-0.1.1/_quarto/_extensions/machow/interlinks/_extension.yml +7 -0
  6. google_ngrams-0.1.1/_quarto/_extensions/machow/interlinks/interlinks.lua +254 -0
  7. google_ngrams-0.1.1/_quarto/_quarto.yml +83 -0
  8. google_ngrams-0.1.1/_quarto/_site/get-started.html +801 -0
  9. google_ngrams-0.1.1/_quarto/_site/get-started_files/figure-html/cell-12-output-1.png +0 -0
  10. google_ngrams-0.1.1/_quarto/_site/get-started_files/figure-html/cell-9-output-1.png +0 -0
  11. google_ngrams-0.1.1/_quarto/_site/logo.png +0 -0
  12. google_ngrams-0.1.1/_quarto/_site/site_libs/bootstrap/bootstrap-icons.css +2078 -0
  13. google_ngrams-0.1.1/_quarto/_site/site_libs/bootstrap/bootstrap-icons.woff +0 -0
  14. google_ngrams-0.1.1/_quarto/_site/site_libs/bootstrap/bootstrap.min.css +12 -0
  15. google_ngrams-0.1.1/_quarto/_site/site_libs/bootstrap/bootstrap.min.js +7 -0
  16. google_ngrams-0.1.1/_quarto/_site/site_libs/clipboard/clipboard.min.js +7 -0
  17. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/anchor.min.js +9 -0
  18. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/popper.min.js +6 -0
  19. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/quarto-syntax-highlighting.css +203 -0
  20. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/quarto.js +899 -0
  21. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/tippy.css +1 -0
  22. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-html/tippy.umd.min.js +2 -0
  23. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-nav/quarto-nav.js +288 -0
  24. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-search/autocomplete.umd.js +3 -0
  25. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-search/fuse.min.js +9 -0
  26. google_ngrams-0.1.1/_quarto/_site/site_libs/quarto-search/quarto-search.js +1247 -0
  27. google_ngrams-0.1.1/_quarto/get-started.qmd +86 -0
  28. google_ngrams-0.1.1/_quarto/index.ipynb +132 -0
  29. google_ngrams-0.1.1/_quarto/index.qmd +71 -0
  30. google_ngrams-0.1.1/_quarto/logo.png +0 -0
  31. google_ngrams-0.1.1/_quarto/objects.json +1 -0
  32. google_ngrams-0.1.1/_quarto/reference/cluster_summary.qmd +13 -0
  33. google_ngrams-0.1.1/_quarto/reference/google_ngram.qmd +25 -0
  34. google_ngrams-0.1.1/_quarto/reference/index.qmd +22 -0
  35. google_ngrams-0.1.1/_quarto/reference/timeviz_barplot.qmd +33 -0
  36. google_ngrams-0.1.1/_quarto/reference/timeviz_scatterplot.qmd +32 -0
  37. google_ngrams-0.1.1/_quarto/reference/timeviz_screeplot.qmd +29 -0
  38. google_ngrams-0.1.1/_quarto/reference/timeviz_vnc.qmd +40 -0
  39. google_ngrams-0.1.1/_quarto/references.bib +22 -0
  40. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/docs/google_ngrams.ipynb +22 -15
  41. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/vnc.py +38 -79
  42. {google_ngrams-0.1.0 → google_ngrams-0.1.1/google_ngrams.egg-info}/PKG-INFO +3 -2
  43. google_ngrams-0.1.1/google_ngrams.egg-info/SOURCES.txt +56 -0
  44. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams.egg-info/requires.txt +1 -0
  45. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/pyproject.toml +2 -1
  46. google_ngrams-0.1.0/_quarto/_quarto.yml +0 -104
  47. google_ngrams-0.1.0/google_ngrams.egg-info/SOURCES.txt +0 -20
  48. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/.github/workflows/ci.yml +0 -0
  49. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/.gitignore +0 -0
  50. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/LICENSE +0 -0
  51. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/docs/.gitkeep +0 -0
  52. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/__init__.py +0 -0
  53. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/data/__init__.py +0 -0
  54. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/data/googlebooks_eng_all_totalcounts_20120701.parquet +0 -0
  55. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/data/googlebooks_eng_gb_all_totalcounts_20120701.parquet +0 -0
  56. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/data/googlebooks_eng_us_all_totalcounts_20120701.parquet +0 -0
  57. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams/ngrams.py +0 -0
  58. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams.egg-info/dependency_links.txt +0 -0
  59. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/google_ngrams.egg-info/top_level.txt +0 -0
  60. {google_ngrams-0.1.0 → google_ngrams-0.1.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: google_ngrams
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Fetch and analyze Google Ngram data for specified word forms.
5
5
  Author-email: David Brown <dwb2@andrew.cmu.edu>
6
6
  Maintainer-email: David Brown <dwb2@andrew.cmu.edu>
@@ -20,6 +20,7 @@ Requires-Dist: importlib-resources>=6.5
20
20
  Requires-Dist: matplotlib>=3.5
21
21
  Requires-Dist: polars>=1.17
22
22
  Requires-Dist: scipy>=1.15
23
+ Requires-Dist: statsmodels>=0.14
23
24
 
24
25
 
25
26
  google_ngrams: Fetch and analyze Google Ngram data for specified word forms.
@@ -51,7 +52,7 @@ You can install the released version of google_ngrams from `PyPI <https://pypi.o
51
52
 
52
53
  .. code-block:: install-google_ngrams
53
54
 
54
- pip install google_ngrams
55
+ pip install google-ngrams
55
56
 
56
57
 
57
58
  Usage
@@ -28,7 +28,7 @@ You can install the released version of google_ngrams from `PyPI <https://pypi.o
28
28
 
29
29
  .. code-block:: install-google_ngrams
30
30
 
31
- pip install google_ngrams
31
+ pip install google-ngrams
32
32
 
33
33
 
34
34
  Usage
@@ -0,0 +1 @@
1
+ /.quarto/
@@ -0,0 +1,3 @@
1
+ *.html
2
+ *.pdf
3
+ *_files/
@@ -0,0 +1,7 @@
1
+ title: Interlinks
2
+ author: Michael Chow
3
+ version: 1.1.0
4
+ quarto-required: ">=1.2.0"
5
+ contributes:
6
+ filters:
7
+ - interlinks.lua
@@ -0,0 +1,254 @@
1
+ local function read_inv_text(filename)
2
+ -- read file
3
+ local file = io.open(filename, "r")
4
+ if file == nil then
5
+ return nil
6
+ end
7
+ local str = file:read("a")
8
+ file:close()
9
+
10
+
11
+ local project = str:match("# Project: (%S+)")
12
+ local version = str:match("# Version: (%S+)")
13
+
14
+ local data = {project = project, version = version, items = {}}
15
+
16
+ local ptn_data =
17
+ "^" ..
18
+ "(.-)%s+" .. -- name
19
+ "([%S:]-):" .. -- domain
20
+ "([%S]+)%s+" .. -- role
21
+ "(%-?%d+)%s+" .. -- priority
22
+ "(%S*)%s+" .. -- uri
23
+ "(.-)\r?$" -- dispname
24
+
25
+
26
+ -- Iterate through each line in the file content
27
+ for line in str:gmatch("[^\r\n]+") do
28
+ if not line:match("^#") then
29
+ -- Match each line against the pattern
30
+ local name, domain, role, priority, uri, dispName = line:match(ptn_data)
31
+
32
+ -- if name is nil, raise an error
33
+ if name == nil then
34
+ error("Error parsing line: " .. line)
35
+ end
36
+
37
+ data.items[#data.items + 1] = {
38
+ name = name,
39
+ domain = domain,
40
+ role = role,
41
+ priority = priority,
42
+ uri = uri,
43
+ dispName = dispName
44
+ }
45
+ end
46
+ end
47
+ return data
48
+ end
49
+
50
+ local function read_json(filename)
51
+
52
+ local file = io.open(filename, "r")
53
+ if file == nil then
54
+ return nil
55
+ end
56
+ local str = file:read("a")
57
+ file:close()
58
+
59
+ local decoded = quarto.json.decode(str)
60
+ return decoded
61
+ end
62
+
63
+ local function read_inv_text_or_json(base_name)
64
+ local file = io.open(base_name .. ".txt", "r")
65
+ if file then
66
+ -- TODO: refactors so we don't just close the file immediately
67
+ io.close(file)
68
+ json = read_inv_text(base_name .. ".txt")
69
+
70
+ else
71
+ json = read_json(base_name .. ".json")
72
+ end
73
+
74
+ return json
75
+ end
76
+
77
+ local inventory = {}
78
+
79
+ local function lookup(search_object)
80
+
81
+ local results = {}
82
+ for _, inv in ipairs(inventory) do
83
+ for _, item in ipairs(inv.items) do
84
+ -- e.g. :external+<inv_name>:<domain>:<role>:`<name>`
85
+ if item.inv_name and item.inv_name ~= search_object.inv_name then
86
+ goto continue
87
+ end
88
+
89
+ if item.name ~= search_object.name then
90
+ goto continue
91
+ end
92
+
93
+ if search_object.role and item.role ~= search_object.role then
94
+ goto continue
95
+ end
96
+
97
+ if search_object.domain and item.domain ~= search_object.domain then
98
+ goto continue
99
+ else
100
+ if search_object.domain or item.domain == "py" then
101
+ table.insert(results, item)
102
+ end
103
+
104
+ goto continue
105
+ end
106
+
107
+ ::continue::
108
+ end
109
+ end
110
+
111
+ if #results == 1 then
112
+ return results[1]
113
+ end
114
+ if #results > 1 then
115
+ quarto.log.warning("Found multiple matches for " .. search_object.name .. ", using the first match.")
116
+ return results[1]
117
+ end
118
+ if #results == 0 then
119
+ quarto.log.warning("Found no matches for object:\n", search_object)
120
+ end
121
+
122
+ return nil
123
+ end
124
+
125
+ local function mysplit (inputstr, sep)
126
+ if sep == nil then
127
+ sep = "%s"
128
+ end
129
+ local t={}
130
+ for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
131
+ table.insert(t, str)
132
+ end
133
+ return t
134
+ end
135
+
136
+ local function normalize_role(role)
137
+ if role == "func" then
138
+ return "function"
139
+ end
140
+ return role
141
+ end
142
+
143
+ local function build_search_object(str)
144
+ local starts_with_colon = str:sub(1, 1) == ":"
145
+ local search = {}
146
+ if starts_with_colon then
147
+ local t = mysplit(str, ":")
148
+ if #t == 2 then
149
+ -- e.g. :py:func:`my_func`
150
+ search.role = normalize_role(t[1])
151
+ search.name = t[2]:match("%%60(.*)%%60")
152
+ elseif #t == 3 then
153
+ -- e.g. :py:func:`my_func`
154
+ search.domain = t[1]
155
+ search.role = normalize_role(t[2])
156
+ search.name = t[3]:match("%%60(.*)%%60")
157
+ elseif #t == 4 then
158
+ -- e.g. :ext+inv:py:func:`my_func`
159
+ search.external = true
160
+
161
+ search.inv_name = t[1]:match("external%+(.*)")
162
+ search.domain = t[2]
163
+ search.role = normalize_role(t[3])
164
+ search.name = t[4]:match("%%60(.*)%%60")
165
+ else
166
+ quarto.log.warning("couldn't parse this link: " .. str)
167
+ return {}
168
+ end
169
+ else
170
+ search.name = str:match("%%60(.*)%%60")
171
+ end
172
+
173
+ if search.name == nil then
174
+ quarto.log.warning("couldn't parse this link: " .. str)
175
+ return {}
176
+ end
177
+
178
+ if search.name:sub(1, 1) == "~" then
179
+ search.shortened = true
180
+ search.name = search.name:sub(2, -1)
181
+ end
182
+ return search
183
+ end
184
+
185
+ local function report_broken_link(link, search_object, replacement)
186
+ -- TODO: how to unescape html elements like [?
187
+ return pandoc.Code(pandoc.utils.stringify(link.content))
188
+ end
189
+
190
+ function Link(link)
191
+ -- do not process regular links ----
192
+ if not link.target:match("%%60") then
193
+ return link
194
+ end
195
+
196
+ -- lookup item ----
197
+ local search = build_search_object(link.target)
198
+ local item = lookup(search)
199
+
200
+ -- determine replacement, used if no link text specified ----
201
+ local original_text = pandoc.utils.stringify(link.content)
202
+ local replacement = search.name
203
+ if search.shortened then
204
+ local t = mysplit(search.name, ".")
205
+ replacement = t[#t]
206
+ end
207
+
208
+ -- set link text ----
209
+ if original_text == "" and replacement ~= nil then
210
+ link.content = pandoc.Code(replacement)
211
+ end
212
+
213
+ -- report broken links ----
214
+ if item == nil then
215
+ return report_broken_link(link, search)
216
+ end
217
+ link.target = item.uri:gsub("%$$", search.name)
218
+
219
+
220
+ return link
221
+ end
222
+
223
+ local function fixup_json(json, prefix)
224
+ for _, item in ipairs(json.items) do
225
+ item.uri = prefix .. item.uri
226
+ end
227
+ table.insert(inventory, json)
228
+ end
229
+
230
+ return {
231
+ {
232
+ Meta = function(meta)
233
+ local json
234
+ local prefix
235
+ if meta.interlinks and meta.interlinks.sources then
236
+ for k, v in pairs(meta.interlinks.sources) do
237
+ local base_name = quarto.project.offset .. "/_inv/" .. k .. "_objects"
238
+ json = read_inv_text_or_json(base_name)
239
+ prefix = pandoc.utils.stringify(v.url)
240
+ if json ~= nil then
241
+ fixup_json(json, prefix)
242
+ end
243
+ end
244
+ end
245
+ json = read_inv_text_or_json(quarto.project.offset .. "/objects")
246
+ if json ~= nil then
247
+ fixup_json(json, "/")
248
+ end
249
+ end
250
+ },
251
+ {
252
+ Link = Link
253
+ }
254
+ }
@@ -0,0 +1,83 @@
1
+ project:
2
+ type: website
3
+ output-dir: _site
4
+
5
+ website:
6
+ title: "google_ngrams"
7
+ description: "Fetch and analyze Google Ngram data for specified word forms."
8
+ page-navigation: true
9
+ favicon: "favicon.ico"
10
+ navbar:
11
+ background: light
12
+ pinned: true
13
+ logo: logo.png
14
+ left:
15
+ - text: "Get started"
16
+ file: get-started.qmd
17
+ - text: "TimeSeries"
18
+ file: time-series.qmd
19
+ - text: "Reference"
20
+ file: reference/index.qmd
21
+ - text: Learn more
22
+ menu:
23
+ - text: "VNC Clustering for R"
24
+ href: https://cran.r-project.org/web/packages/pseudobibeR/index.html
25
+ target: _blank
26
+ right:
27
+ - icon: github
28
+ href: https://github.com/browndw/google_ngrams
29
+ aria-label: google_ngrams on GitHub
30
+ sidebar:
31
+ style: "floating"
32
+ collapse-level: 1
33
+ contents:
34
+ - section: Fetching Data
35
+ contents:
36
+ - text: "`google_ngram`"
37
+ href: reference/google_ngram.qmd
38
+ - section: Time Series
39
+ contents:
40
+ - text: "`timeviz_barplot`"
41
+ href: reference/timeviz_barplot.qmd
42
+ - text: "`timeviz_scatterplot`"
43
+ href: reference/timeviz_scatterplot.qmd
44
+ - text: "`timeviz_screeplot`"
45
+ href: reference/timeviz_screeplot.qmd
46
+ - text: "`timeviz_screeplot`"
47
+ href: reference/timeviz_screeplot.qmd
48
+ - text: "`timeviz_vnc`"
49
+ href: reference/timeviz_vnc.qmd
50
+ - text: "`cluster_summary`"
51
+ href: reference/cluster_summary.qmd
52
+
53
+ bibliography: references.bib
54
+
55
+ format:
56
+ html:
57
+ sidebar: false
58
+
59
+ quartodoc:
60
+ title: Reference
61
+ package: google_ngrams
62
+ sections:
63
+ - title: google_ngrams fetch function
64
+ desc: "Read in Google n-gram data"
65
+ package: google_ngrams.ngrams
66
+ contents:
67
+ - google_ngram
68
+ - title: google_ngrams TimeSeries
69
+ desc: "Analyze time series data"
70
+ package: google_ngrams.TimeSeries
71
+ contents:
72
+ - timeviz_barplot
73
+ - timeviz_scatterplot
74
+ - timeviz_screeplot
75
+ - timeviz_screeplot
76
+ - timeviz_vnc
77
+ - cluster_summary
78
+
79
+ filters:
80
+ - interlinks
81
+
82
+ interlinks:
83
+ sources: {}