philologic 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. philologic/Config.py +947 -0
  2. philologic/TagCensus.py +143 -0
  3. philologic/__init__.py +4 -0
  4. philologic/loadtime/LoadFilters.py +420 -0
  5. philologic/loadtime/LoadOptions.py +247 -0
  6. philologic/loadtime/Loader.py +1317 -0
  7. philologic/loadtime/OHCOVector.py +381 -0
  8. philologic/loadtime/Parser.py +1536 -0
  9. philologic/loadtime/PlainTextParser.py +207 -0
  10. philologic/loadtime/PostFilters.py +324 -0
  11. philologic/loadtime/__init__.py +1 -0
  12. philologic/loadtime/__main__.py +40 -0
  13. philologic/runtime/DB.py +246 -0
  14. philologic/runtime/FragmentParser.py +129 -0
  15. philologic/runtime/HitList.py +313 -0
  16. philologic/runtime/HitWrapper.py +225 -0
  17. philologic/runtime/MetadataQuery.py +339 -0
  18. philologic/runtime/ObjectFormatter.py +849 -0
  19. philologic/runtime/Query.py +787 -0
  20. philologic/runtime/QuerySyntax.py +125 -0
  21. philologic/runtime/WSGIHandler.py +223 -0
  22. philologic/runtime/__init__.py +27 -0
  23. philologic/runtime/access_control.py +509 -0
  24. philologic/runtime/citations.py +146 -0
  25. philologic/runtime/find_similar_words.py +61 -0
  26. philologic/runtime/get_text.py +104 -0
  27. philologic/runtime/link.py +76 -0
  28. philologic/runtime/pages.py +18 -0
  29. philologic/runtime/reports/__init__.py +13 -0
  30. philologic/runtime/reports/aggregation.py +195 -0
  31. philologic/runtime/reports/bibliography.py +79 -0
  32. philologic/runtime/reports/collocation.py +321 -0
  33. philologic/runtime/reports/concordance.py +57 -0
  34. philologic/runtime/reports/frequency.py +210 -0
  35. philologic/runtime/reports/generate_word_frequency.py +86 -0
  36. philologic/runtime/reports/kwic.py +91 -0
  37. philologic/runtime/reports/landing_page.py +197 -0
  38. philologic/runtime/reports/navigation.py +76 -0
  39. philologic/runtime/reports/table_of_contents.py +88 -0
  40. philologic/runtime/reports/time_series.py +128 -0
  41. philologic/runtime/web_config.py +32 -0
  42. philologic/shlax.py +196 -0
  43. philologic/shlaxtree.py +245 -0
  44. philologic/utils/__init__.py +6 -0
  45. philologic/utils/convert_entities.py +33 -0
  46. philologic/utils/line_count.py +16 -0
  47. philologic/utils/load_module.py +14 -0
  48. philologic/utils/metadata_type_handler.py +62 -0
  49. philologic/utils/pretty_print.py +27 -0
  50. philologic/utils/sort.py +26 -0
  51. philologic-5.1.0.dist-info/METADATA +30 -0
  52. philologic-5.1.0.dist-info/RECORD +55 -0
  53. philologic-5.1.0.dist-info/WHEEL +5 -0
  54. philologic-5.1.0.dist-info/licenses/LICENSE +674 -0
  55. philologic-5.1.0.dist-info/top_level.txt +1 -0
philologic/Config.py ADDED
@@ -0,0 +1,947 @@
1
+ #!/var/lib/philologic5/philologic_env/bin/python3
2
+ """Configuration module"""
3
+
4
+ import os
5
+ import sys
6
+
7
+ import orjson as json
8
+ from philologic.utils import pretty_print
9
+
10
+ CITATIONS = {
11
+ "author": {
12
+ "field": "author",
13
+ "object_level": "doc",
14
+ "prefix": "",
15
+ "suffix": "",
16
+ "link": True,
17
+ "style": {"font-variant": "small-caps"},
18
+ },
19
+ "title": {
20
+ "field": "title",
21
+ "object_level": "doc",
22
+ "prefix": "",
23
+ "suffix": "",
24
+ "link": True,
25
+ "style": {"font-variant": "small-caps", "font-style": "italic", "font-weight": 700},
26
+ },
27
+ "year": {
28
+ "field": "year",
29
+ "object_level": "doc",
30
+ "prefix": "",
31
+ "suffix": "",
32
+ "link": False,
33
+ "style": {},
34
+ },
35
+ "pub_place": {
36
+ "field": "pub_place",
37
+ "object_level": "doc",
38
+ "prefix": "",
39
+ "suffix": "",
40
+ "link": False,
41
+ "style": {},
42
+ },
43
+ "publisher": {
44
+ "field": "publisher",
45
+ "object_level": "doc",
46
+ "prefix": "",
47
+ "suffix": "",
48
+ "link": False,
49
+ "style": {},
50
+ },
51
+ "collection": {
52
+ "field": "collection",
53
+ "object_level": "doc",
54
+ "prefix": "",
55
+ "suffix": ", ",
56
+ "link": False,
57
+ "style": {},
58
+ },
59
+ "div1_head": {
60
+ "field": "head",
61
+ "object_level": "div1",
62
+ "prefix": "",
63
+ "suffix": "",
64
+ "link": True,
65
+ "style": {"font-variant": "small-caps"},
66
+ },
67
+ "div2_head": {
68
+ "field": "head",
69
+ "object_level": "div2",
70
+ "prefix": "",
71
+ "suffix": "",
72
+ "link": True,
73
+ "style": {"font-variant": "small-caps"},
74
+ },
75
+ "div3_head": {
76
+ "field": "head",
77
+ "object_level": "div3",
78
+ "prefix": "",
79
+ "suffix": "",
80
+ "link": True,
81
+ "style": {"font-variant": "small-caps"},
82
+ },
83
+ "div1_date": {
84
+ "field": "div_date",
85
+ "object_level": "div1",
86
+ "prefix": "",
87
+ "suffix": "",
88
+ "link": False,
89
+ "style": {},
90
+ },
91
+ "div2_date": {
92
+ "field": "div_date",
93
+ "object_level": "div2",
94
+ "prefix": "",
95
+ "suffix": "",
96
+ "link": False,
97
+ "style": {},
98
+ },
99
+ "div3_date": {
100
+ "field": "div_date",
101
+ "object_level": "div3",
102
+ "prefix": "",
103
+ "suffix": "",
104
+ "link": False,
105
+ "style": {},
106
+ },
107
+ "speaker": {
108
+ "field": "speaker",
109
+ "object_level": "para",
110
+ "prefix": "",
111
+ "suffix": "",
112
+ "link": True,
113
+ "style": {"font-variant": "small-caps"},
114
+ },
115
+ "resp": {
116
+ "field": "resp",
117
+ "object_level": "para",
118
+ "prefix": "",
119
+ "suffix": "",
120
+ "link": True,
121
+ "style": {"font-variant": "small-caps"},
122
+ },
123
+ "page": {
124
+ "style": {},
125
+ "suffix": "",
126
+ "object_level": "page",
127
+ "field": "n",
128
+ "prefix": "page ",
129
+ "link": True,
130
+ },
131
+ }
132
+
133
+ DB_LOCALS_DEFAULTS = {
134
+ "metadata_fields": {"value": [], "comment": ""},
135
+ "metadata_hierarchy": {"value": [[]], "comment": ""},
136
+ "metadata_types": {"value": {}, "comment": ""},
137
+ "ascii_conversion": {
138
+ "value": True,
139
+ "comment": "\n".join(
140
+ [
141
+ "# This defines whether text and metadata were converted to an ASCII representation",
142
+ "# Don't change this value post-database load.",
143
+ ]
144
+ ),
145
+ },
146
+ "normalized_fields": {"value": [], "comment": ""},
147
+ "default_object_level": {
148
+ "value": "doc",
149
+ "comment": "# This defines the default navigation element in your database",
150
+ },
151
+ "lowercase_index": {
152
+ "value": True,
153
+ "comment": "# This defines whether all terms in the index have been lowercased. If so, input searches will be lowercased",
154
+ },
155
+ "debug": {
156
+ "value": False,
157
+ "comment": "# If set to True, this enabled debugging messages to be printed out to the Apache error log",
158
+ },
159
+ "secret": {
160
+ "value": "",
161
+ "comment": "# The secret value is a random string to be used to generate a secure cookie for access control. The string value can be anything.",
162
+ },
163
+ "overflow_words": {
164
+ "value": set(),
165
+ "comment": "# The overflow_words variable is a set of words which are not indexed in the database, but stored as blobs in the data/overflow_words directory.",
166
+ }
167
+ }
168
+ DB_LOCALS_HEADER = """
169
+ #########################################################\n
170
+ #### Database configuration options for PhiloLogic5 #####\n
171
+ #########################################################\n
172
+ #### All variables must be in valid Python syntax #######\n
173
+ #########################################################\n
174
+ #### Edit with extra care: an invalid ##########\n
175
+ #### configuration could break your database. ##########\n
176
+ #########################################################\n\n\n
177
+ """
178
+
179
+ WEB_CONFIG_DEFAULTS = {
180
+ "dbname": {
181
+ "value": "noname",
182
+ "comment": "\n".join(["# The dbname variable is the title name in the HTML header"]),
183
+ },
184
+ "access_control": {
185
+ "value": False,
186
+ "comment": "\n".join(
187
+ [
188
+ "# Configure access control with True or False.",
189
+ "# Note that if you want access control, you have to provide a logins.txt file inside your /data directory,",
190
+ "# otherwise access will remain open.",
191
+ ]
192
+ ),
193
+ },
194
+ "access_file": {
195
+ "value": "",
196
+ "comment": "\n".join(["# Location of access file which contains authorized/unauthorized IPs and domain names"]),
197
+ },
198
+ "link_to_home_page": {
199
+ "value": "",
200
+ "comment": "\n".join(
201
+ [
202
+ '# If set, link_to_home_page should be a string starting with "http://" pointing to a separate home page for the database'
203
+ ]
204
+ ),
205
+ },
206
+ "search_reports": {
207
+ "value": ["concordance", "kwic", "aggregation", "collocation", "time_series"],
208
+ "comment": "\n".join(
209
+ [
210
+ "# The search_reports variable sets which search report is viewable in the search form",
211
+ "# Available reports are concordance, kwic, aggregation, collocation, and time_series",
212
+ ]
213
+ ),
214
+ },
215
+ "metadata": {
216
+ "value": [],
217
+ "comment": "\n".join(["# The metadata variable sets which metadata field is viewable in the search form"]),
218
+ },
219
+ "metadata_aliases": {
220
+ "value": {},
221
+ "comment": "\n".join(
222
+ [
223
+ "# The metadata_aliases variable allows to display a metadata variable under a different name in the HTML",
224
+ "# For example, you could rename the who metadata to Speaker, and the create_date variable to Date like so:",
225
+ "# metadata_aliases = {'who': 'Speaker', 'create_date', 'Date'}",
226
+ ]
227
+ ),
228
+ },
229
+ "metadata_input_style": {
230
+ "value": {},
231
+ "comment": "\n".join(
232
+ [
233
+ "# The metadata_input_style variable defines whether to use an text input field, a dropdown menu or checkboxes for any given",
234
+ "# metadata field. All fields are set by default to text. Note that dropdowns only allow you to select one value, whereas checkboxes allow you to select more than one."
235
+ '# If using a dropdown menu, you need to set it to "dropdown" and populate the metadata_choice_values variable. If using checkboxes, set to "checkboxes", and populate'
236
+ "# the metadata_choice_values variable",
237
+ ]
238
+ ),
239
+ },
240
+ "metadata_choice_values": {
241
+ "value": {},
242
+ "comment": "\n".join(
243
+ [
244
+ "# The metadata_choice_values variable defines what values to display in the metadata dropdown. It defaults to an empty dict.",
245
+ "# If no value is provided for a metadata field which has an input type of dropdown, no value will be displayed. You should",
246
+ "# provide a list of strings with labels and values for metadata.",
247
+ """# ex: {"title": [{"label": "Contrat Social", "value": "Du Contrat Social"}, {"label": "Emile", "value": "Emile, ou de l'éducation"}]}""",
248
+ ]
249
+ ),
250
+ },
251
+ "word_property_aliases": {
252
+ "value": {},
253
+ "comment": "\n".join(
254
+ [
255
+ "# The word_property_aliases variable allows to display a word property variable under a different name in the HTML",
256
+ "# For example, you could rename the lemma metadata to Lemma, and the pos variable to Part of Speech like so:",
257
+ "# word_property_aliases = {'lemma': 'Lemma', 'pos': 'Part of Speech'}",
258
+ ]
259
+ ),
260
+ },
261
+ "autocomplete": {
262
+ "value": ["q"],
263
+ "comment": "# The autocomplete variable determines which fields have autocomplete enabled. Note that the value 'q' is for term autocomplete",
264
+ },
265
+ "facets": {
266
+ "value": [],
267
+ "comment": "\n".join(
268
+ [
269
+ "# The facets variable sets which metadata field can be used as a facet",
270
+ "# The object format is a list of metadata like the following: ['author', 'title', 'year'}",
271
+ "# The dict key should describe what the facets will do, and the dict value, which has to be a list,",
272
+ "# should list the metadata to be used for the frequency counts",
273
+ ]
274
+ ),
275
+ },
276
+ "words_facets": {
277
+ "value": [],
278
+ "comment": "\n".join(
279
+ [
280
+ "# The words_facets variable functions much like the facets variable, but describes metadata attached to words,",
281
+ "# such as lemma or part of speech. The object format is a list of metadata like the following: ['lemma', 'pos'}",
282
+ ]
283
+ ),
284
+ },
285
+ "skip_table_of_contents": {
286
+ "value": False,
287
+ "comment": "\n".join(
288
+ [
289
+ "# If set to True, disable display of table of contents and go straight to the text. Useful when texts have no internal structure."
290
+ ]
291
+ ),
292
+ },
293
+ "concordance_length": {
294
+ "value": 300,
295
+ "comment": "\n".join(["# The concordance_length variable sets the length in bytes of each concordance result"]),
296
+ },
297
+ "search_examples": {
298
+ "value": {},
299
+ "comment": "\n".join(
300
+ [
301
+ "# The search_examples variable defines which examples should be provided for each searchable field in the search form.",
302
+ "# If None is the value, or there are any missing examples, defaults will be generated at runtime by picking the first",
303
+ "# result for any given field. If you wish to change these default values, you should configure them here like so:",
304
+ '# search_examples = {"author": "Jean-Jacques Rousseau", "title": "Du contrat social"}',
305
+ ]
306
+ ),
307
+ },
308
+ "word_attributes": {
309
+ "value": {},
310
+ "comment": "\n".join(
311
+ [
312
+ "# The word_attributes variable defines word attributes filters for use in the collocation report to allow searching",
313
+ "# only on a combination of one or more word attribute. Shows up in the search form under the collocation report only.",
314
+ ]
315
+ ),
316
+ },
317
+ "results_summary": {
318
+ "value": [
319
+ {
320
+ "field": "author",
321
+ "object_level": "doc",
322
+ },
323
+ {
324
+ "field": "title",
325
+ "object_level": "doc",
326
+ },
327
+ ],
328
+ "comment": "# The results_summary variable determins which fields get stats displayed at the top of concordance/KWIC results.",
329
+ },
330
+ "respect_text_line_breaks": {
331
+ "value": False,
332
+ "comment": "\n".join(
333
+ [
334
+ "# The respect_text_line_breaks variable determines if line breaks in the source file should be displayed as line breaks in the browser",
335
+ "# This is really useful for displaying plain text files since they have no XML or DOM structure. Should otherwise be False for TEI/XML",
336
+ ]
337
+ ),
338
+ },
339
+ "stopwords": {
340
+ "value": "",
341
+ "comment": "\n".join(
342
+ [
343
+ "# The stopwords variable defines a file path containing a list of words (one word per line) used for filtering out words",
344
+ "# in the collocation report. If only a filename is provided, it will look for the file in the /data directory.",
345
+ "# The file must be located in the defined path. If the file is not found,",
346
+ "# no option for using a stopword list will be displayed in collocation searches.",
347
+ ]
348
+ ),
349
+ },
350
+ "citations": {
351
+ "value": CITATIONS,
352
+ "comment": "\n".join(
353
+ [
354
+ "# Define how individual metadata is displayed. The citations variable is reused by default for citations in individual reports.",
355
+ "# You can define styling with a dictionary of valid CSS property/value such as those in the default values.",
356
+ "# prefix and suffix keywords define what precedes and follows each field. You can use HTML for these strings.",
357
+ "# The link key enables linking for that metadata field. It links to the table of contents for title and filename,",
358
+ "# and to a regular query for all other metadata fields.",
359
+ ]
360
+ ),
361
+ },
362
+ "aggregation_config": {
363
+ "value": [
364
+ {
365
+ "field": "author",
366
+ "object_level": "doc",
367
+ "break_up_field": "title",
368
+ "field_citation": [CITATIONS["author"]],
369
+ "break_up_field_citation": [
370
+ CITATIONS["title"],
371
+ CITATIONS["year"],
372
+ CITATIONS["pub_place"],
373
+ CITATIONS["publisher"],
374
+ CITATIONS["collection"],
375
+ ],
376
+ },
377
+ {
378
+ "field": "title",
379
+ "object_level": "doc",
380
+ "field_citation": [
381
+ CITATIONS["title"],
382
+ CITATIONS["year"],
383
+ CITATIONS["pub_place"],
384
+ CITATIONS["publisher"],
385
+ CITATIONS["collection"],
386
+ ],
387
+ "break_up_field": None,
388
+ "break_up_field_citation": None,
389
+ },
390
+ ],
391
+ "comment": "\n".join(
392
+ [
393
+ "# The aggregation_config variable drives the aggregation report: which fields can be used to group concordances,"
394
+ "# and whether you can further break down these counts by a particular metadata field.",
395
+ ]
396
+ ),
397
+ },
398
+ "dictionary": {
399
+ "value": False,
400
+ "comment": "\n".join(
401
+ [
402
+ "# The dictionary variable enables a different search interface with the headword as a starting point. It is turned off by default"
403
+ ]
404
+ ),
405
+ },
406
+ "dictionary_bibliography": {
407
+ "value": False,
408
+ "comment": "\n".join(
409
+ [
410
+ "# The dictionary_bibliography variable enables a different a bibliography report where entries are displayed",
411
+ "# in their entirety and grouped under the same title. It is turned off by default",
412
+ ]
413
+ ),
414
+ },
415
+ "dictionary_selection": {
416
+ "value": False,
417
+ "comment": "\n".join(
418
+ [
419
+ "# If set to True, this option creates a dropdown menu to select searching within only a single volume or title.",
420
+ "# This replaces the title field in the search form.",
421
+ "# You need to configure the dictionary_selection_options variable below to define your options.",
422
+ ]
423
+ ),
424
+ },
425
+ "dictionary_selection_options": {
426
+ "value": [],
427
+ "comment": "\n".join(
428
+ [
429
+ "# If dictionary_selection is set to True, you need to populate this variable as in the following:",
430
+ """# [{"label": "DAF 1932", "title": "Dictionnaire de l'Académie Française 1932"}]""",
431
+ "# Each volume is represented as an object containing the label which is displayed in the search form",
432
+ "# and a title value which should either be the exact string stored in the SQL table, or a ripgrep expression",
433
+ '# such as "Dictionnaire de Littre.*" if you wish to match more than one title.',
434
+ ]
435
+ ),
436
+ },
437
+ "landing_page_browsing": {
438
+ "value": "default",
439
+ "comment": "\n".join(
440
+ [
441
+ "# The landing_page_browsing variable defines what type of landing page. There are 4 built-in reports available: 'default',",
442
+ "# 'dictionary', 'simple', or 'toc'. You can otherwise supply a relative path to a custom HTML template. Note that this path is relative",
443
+ "# to the database root. The only constraint for custom templates is that the HTML must be encapsulated inside a div",
444
+ ]
445
+ ),
446
+ },
447
+ "default_landing_page_browsing": {
448
+ "value": [
449
+ {
450
+ "label": "Author",
451
+ "group_by_field": "author",
452
+ "display_count": True,
453
+ "queries": ["A-D", "E-I", "J-M", "N-R", "S-Z"],
454
+ "is_range": True,
455
+ "citation": [CITATIONS["author"]],
456
+ },
457
+ {
458
+ "label": "Title",
459
+ "group_by_field": "title",
460
+ "display_count": False,
461
+ "queries": ["A-D", "E-I", "J-M", "N-R", "S-Z"],
462
+ "is_range": True,
463
+ "citation": [CITATIONS["author"], CITATIONS["title"], CITATIONS["year"]],
464
+ },
465
+ ],
466
+ "comment": "\n".join(
467
+ [
468
+ "# The landing_page_browsing variable allows for configuration of navigation by metadata within the landing page.",
469
+ "# You can choose any document-level metadata (such as author, title, date, genre...) for browsing",
470
+ '# and define two different types of queries to group your data: ranges and exact matches, i.e. "A-D" or "Comedy".',
471
+ "# You can define styling with a dictionary of valid CSS property/value such as those in the default values.",
472
+ "# begin and end keywords define what precedes and follows each field. You can use HTML for these strings.",
473
+ ]
474
+ ),
475
+ },
476
+ "default_landing_page_display": {
477
+ "value": {},
478
+ "comment": "\n".join(
479
+ [
480
+ "# The default landing page display variable allows you to load content by default. It is configured",
481
+ "# in the same way as default_landing_page_display objects except that you need to define just one",
482
+ "# range (the one you wish to display) as a string, such as 'A-D'. An empty dict will disable the feature.",
483
+ ]
484
+ ),
485
+ },
486
+ "simple_landing_citation": {
487
+ "value": [
488
+ CITATIONS["author"],
489
+ CITATIONS["title"],
490
+ CITATIONS["year"],
491
+ CITATIONS["pub_place"],
492
+ CITATIONS["publisher"],
493
+ CITATIONS["collection"],
494
+ ],
495
+ "comment": "\n".join(["# This variable defines the citation for the simple landing page."]),
496
+ },
497
+ "dico_letter_range": {
498
+ "value": [
499
+ "A",
500
+ "B",
501
+ "C",
502
+ "D",
503
+ "E",
504
+ "F",
505
+ "G",
506
+ "H",
507
+ "I",
508
+ "J",
509
+ "K",
510
+ "L",
511
+ "M",
512
+ "N",
513
+ "O",
514
+ "P",
515
+ "Q",
516
+ "R",
517
+ "S",
518
+ "T",
519
+ "U",
520
+ "V",
521
+ "W",
522
+ "X",
523
+ "Y",
524
+ "Z",
525
+ ],
526
+ "comment": "\n".join(
527
+ [
528
+ "# If landing_page_browsing is set to dictionary, the dico_letter_range variable allows you to define set of letters corresponding to the first letter of a headword. This generates a set of buttons",
529
+ "# for browsing the database available on the landing page. The default represents the entire roman alphabet. An empty list hides the table.",
530
+ ]
531
+ ),
532
+ },
533
+ "concordance_citation": {
534
+ "value": [
535
+ CITATIONS["author"],
536
+ CITATIONS["title"],
537
+ CITATIONS["year"],
538
+ CITATIONS["div1_head"],
539
+ CITATIONS["div2_head"],
540
+ CITATIONS["div3_head"],
541
+ CITATIONS["speaker"],
542
+ CITATIONS["resp"],
543
+ CITATIONS["page"],
544
+ ],
545
+ "comment": "\n".join(
546
+ [
547
+ "# The concordance_citation variable define how and in what field order citations are displayed in concordance reports.",
548
+ "# You can define styling with a dictionary of valid CSS property/value such as those in the default values.",
549
+ "# See comments for the citations variable for how to configure citations",
550
+ ]
551
+ ),
552
+ },
553
+ "bibliography_citation": {
554
+ "value": [
555
+ CITATIONS["author"],
556
+ CITATIONS["title"],
557
+ CITATIONS["year"],
558
+ CITATIONS["div1_head"],
559
+ CITATIONS["div2_head"],
560
+ CITATIONS["div3_head"],
561
+ CITATIONS["speaker"],
562
+ CITATIONS["resp"],
563
+ CITATIONS["page"],
564
+ ],
565
+ "comment": "\n".join(
566
+ [
567
+ "# The bibligraphy_citation variable define how and in what field order citations are displayed in bibliography reports.",
568
+ "# You can define styling with a dictionary of valid CSS property/value such as those in the default values.",
569
+ "# See comments for the citations variable for how to configure citations",
570
+ ]
571
+ ),
572
+ },
573
+ "table_of_contents_citation": {
574
+ "value": [],
575
+ "comment": "\n".join(
576
+ [
577
+ "# The table_of_contents_citation variable define how and in what field order citations are displayed within the table of content",
578
+ "# In most cases, this should remain empty, except in the cases of div elements with different metadata values",
579
+ ]
580
+ ),
581
+ },
582
+ "navigation_citation": {
583
+ "value": [
584
+ CITATIONS["author"],
585
+ CITATIONS["title"],
586
+ CITATIONS["year"],
587
+ CITATIONS["pub_place"],
588
+ CITATIONS["publisher"],
589
+ CITATIONS["collection"],
590
+ ],
591
+ "comment": "\n".join(
592
+ [
593
+ "# The navigation_citation variable define how and in what field order citations are displayed in navigation reports.",
594
+ "# You can define styling with a dictionary of valid CSS property/value such as those in the default values.",
595
+ "# See comments for the citations variable for how to configure citations",
596
+ ]
597
+ ),
598
+ },
599
+ "kwic_bibliography_fields": {
600
+ "value": [],
601
+ "comment": "\n".join(
602
+ [
603
+ "# The kwic_bibliography_fields variable defines which bibliography fields will be displayed in the KWIC view. It should be",
604
+ "# modified with extra care in conjunction with the concordance_citation function located in reports/concordance.py.",
605
+ "# If the variable is an empty list, filename will be used.",
606
+ "",
607
+ ]
608
+ ),
609
+ },
610
+ "concordance_biblio_sorting": {
611
+ "value": [],
612
+ "comment": "\n".join(
613
+ [
614
+ "# The concordance_biblio_sorting variable allows you to pick wich metadata field can be used for sorting concordance or bibliography results.",
615
+ "# It is a list of tuples where multiple metadata fields can be used for sorting, such as [('author', 'title'), ('year', 'author', 'title')].",
616
+ '# Note that these fields must belong to the same object type, such as "doc" or "div".',
617
+ "",
618
+ ]
619
+ ),
620
+ },
621
+ "kwic_metadata_sorting_fields": {
622
+ "value": [],
623
+ "comment": "\n".join(
624
+ [
625
+ "# The kwic_metadata_sorting_fields variable allows you to pick wich metadata field can be used for sorting KWIC results.",
626
+ "",
627
+ ]
628
+ ),
629
+ },
630
+ "collocation_fields_to_compare": {
631
+ "value": ["author", "title"],
632
+ "comment": "\n".join(
633
+ [
634
+ "# The collocation_fields_to_compare variable defines which metadata fields are used to compare collocation results.",
635
+ "# It is a list of metadata fields, such as ['author', 'title'].",
636
+ "",
637
+ ]
638
+ ),
639
+ },
640
+ "time_series_year_field": {
641
+ "value": "year",
642
+ "comment": "\n".join(
643
+ [
644
+ "# The time_series_year_field variable defines which metadata field to use for time series.",
645
+ "",
646
+ ]
647
+ ),
648
+ },
649
+ "time_series_interval": {
650
+ "value": 10,
651
+ "comment": "\n".join(
652
+ ["# The time_series_interval variable defines the default year span used for time series."]
653
+ ),
654
+ },
655
+ "time_series_start_end_date": {
656
+ "value": {"start_date": 0, "end_date": 0},
657
+ "comment": "\n".join(
658
+ [
659
+ "# The time_series_start_end_date variable defines the default start and end dates for time series when no dates are provided."
660
+ ]
661
+ ),
662
+ },
663
+ "external_page_images": {
664
+ "value": False,
665
+ "comment": "\n".join(["# This defines whether the page images should be viewed in a non-PhiloLogic instance"]),
666
+ },
667
+ "page_images_url_root": {
668
+ "value": "",
669
+ "comment": "\n".join(
670
+ [
671
+ "# The page_images_url_root variable defines a root URL where pages images can be fetched when a filename is provided inside a page tag.",
672
+ "# Note that the page image filename must be inside a fac or id attribute such as:",
673
+ '# <pb fac="filename.jpg"> or <pb id="filename.jpg">',
674
+ "# So a URL of http://my-server.com/images/ will resolve to http://my-server.com/images/filename.jpg.",
675
+ "",
676
+ ]
677
+ ),
678
+ },
679
+ "page_image_extension": {
680
+ "value": "",
681
+ "comment": "\n".join(
682
+ [
683
+ "# The page_image_extension value is useful when the image name does not have an extension defined in the markup.",
684
+ '# For instance, given <pb n="1" fac="image1">, you could define the extension as ".jpeg" and the browser would fetch',
685
+ "# the image at http://some-url/image1.jpeg (where some-url is defined in the above page_images_url_root variable).",
686
+ "",
687
+ ]
688
+ ),
689
+ },
690
+ "logo": {
691
+ "value": "",
692
+ "comment": "\n".join(
693
+ [
694
+ "# The logo variable defines the location of an image to display on the landing page, just below the",
695
+ "# search form. It can be a relative URL, or an absolute link, in which case you want to make sure",
696
+ "# it starts with http://. If no logo is defined, no picture will be displayed.",
697
+ "",
698
+ ]
699
+ ),
700
+ },
701
+ "header_in_toc": {
702
+ "value": False,
703
+ "comment": "# The header_in_toc variable defines whether to display a button to show the header in the table of contents",
704
+ },
705
+ "search_syntax_template": {
706
+ "value": "default",
707
+ "comment": "\n".join(
708
+ [
709
+ "# You can define a custom HTML template for the search syntax pop-up window, in which case you need to supply the",
710
+ "# relative path to the template. Note that this path is relative to the database root. The only constraint",
711
+ "# for custom templates is that the HTML must be encapsulated inside a div",
712
+ "",
713
+ ]
714
+ ),
715
+ },
716
+ "concordance_formatting_regex": {
717
+ "value": [],
718
+ "comment": "\n".join(
719
+ [
720
+ "# A list of pattern with replacement to be run on individual concordances before sending to browser",
721
+ "# It is constructed as a list of tuples where the first element is the pattern to be matched",
722
+ "# and the second element is the replacement",
723
+ '# e.g.: [("<note>", "<span>"), ("</note>", "</span>")]',
724
+ "",
725
+ ]
726
+ ),
727
+ },
728
+ "kwic_formatting_regex": {
729
+ "value": [],
730
+ "comment": "\n".join(
731
+ [
732
+ "# A list of pattern with replacement to be run on individual kwic concordances before sending to browser",
733
+ "# It is constructed as a list of tuples where the first element is the pattern to be matched",
734
+ "# and the second element is the replacement",
735
+ '# e.g.: [("<note>", "<span>"), ("</note>", "</span>")]',
736
+ "",
737
+ ]
738
+ ),
739
+ },
740
+ "navigation_formatting_regex": {
741
+ "value": [],
742
+ "comment": "\n".join(
743
+ [
744
+ "# A list of pattern with replacement to be run on text objects before sending to browser",
745
+ "# It is constructed as a list of tuples where the first element is the pattern to be matched",
746
+ "# and the second element is the replacement",
747
+ '# e.g.: [("<note>", "<span>"), ("</note>", "</span>")]',
748
+ "",
749
+ ]
750
+ ),
751
+ },
752
+ "dictionary_lookup": {
753
+ "value": {"url_root": "", "keywords": False},
754
+ "comment": "\n".join(
755
+ [
756
+ "# Dictionary lookup function. You select a word in running text and hit D, and it'll query an external dictionary and return",
757
+ "# definitions. You need to provide the URL root of the dictionary. If keywords is false, the word selected is just appened to",
758
+ "# the URL. Otherwise, if set to True, you need to configure the dictionary_lookup_keywords variable below to construct the full URL."
759
+ "",
760
+ ]
761
+ ),
762
+ },
763
+ "dictionary_lookup_keywords": {
764
+ "value": {
765
+ "immutable_key_values": {},
766
+ "variable_key_values": {},
767
+ "selected_keyword": "",
768
+ },
769
+ "comment": "\n".join(
770
+ [
771
+ "# This defines what keyword/values are appended to the root URL for dico lookup. The immutable_key_values defines key/values which are hardcoded",
772
+ "# The variable_key_values defines a key/value pair where the key is the URL key, and the value is a corresponding metadata field value from the text",
773
+ "# currently displayed. The selected_keyword corresponds to the URL key for the word selected in the text.",
774
+ ]
775
+ ),
776
+ },
777
+ "query_parser_regex": {
778
+ "value": [
779
+ ("-", " "),
780
+ (" OR ", " | "),
781
+ ("'", " "),
782
+ (";", ""),
783
+ (",", ""),
784
+ ("!", ""),
785
+ ("\u3000", " "),
786
+ ("|", "|"),
787
+ ("”", '"'),
788
+ ("-", "-"),
789
+ ("*", "*"),
790
+ ],
791
+ "comment": "\n".join(
792
+ [
793
+ "# A list of pattern with replacement to be run on all incoming queries",
794
+ "# It is constructed as a list of tuples where the first element is the regex pattern to be matched",
795
+ "# and the second element is the replacement",
796
+ '# e.g.: [(" OR ", " | "), ("-", " ")]',
797
+ "",
798
+ ]
799
+ ),
800
+ },
801
+ "report_error_link": {
802
+ "value": "",
803
+ "comment": "# The link should start with http:// or https://. This will display an error report link in the header and in document navigation",
804
+ },
805
+ "academic_citation": {
806
+ "value": {"collection": "", "citation": [], "custom_url": ""},
807
+ "comment": "\n".join(
808
+ [
809
+ "# The academic citation use to cite this database. The citation is build with the citation defined (from metadata values) + the collection (which can be HTML)",
810
+ """# e.g.: {"collection": 'ARTFL-FRANTEXT, University of Chicago', "citation": [citations["author"], citations["title"], citations["year"]]}""",
811
+ "# You can define a custom URL (not the URL of the database itself).",
812
+ ]
813
+ ),
814
+ },
815
+ }
816
+
817
+ WEB_CONFIG_HEADER = """
818
+ ####################################################\n
819
+ #### Web configuration options for PhiloLogic5 #####\n
820
+ ####################################################\n
821
+ ### All variables must be in valid Python syntax ###\n
822
+ ####################################################\n\n\n
823
+ """
824
+
825
+
826
+ class Config:
827
+ """Main Config class to build out web_config and db.locals"""
828
+
829
+ def __init__(self, filename, defaults, header=""):
830
+ self.filename = filename
831
+ self.db_path = os.path.dirname(os.path.dirname(self.filename))
832
+ self.defaults = defaults
833
+ self.header = header
834
+ self.data = {key: value["value"] for key, value in self.defaults.items()}
835
+ if self.filename and os.path.exists(self.filename):
836
+ exec(compile(open(self.filename, "rb").read(), self.filename, "exec"), globals(), self.data)
837
+ self.valid_config = True
838
+ self.time_series_status = True
839
+ self.converted = False
840
+
841
+ def __getitem__(self, item):
842
+ try:
843
+ return self.data[item]
844
+ except KeyError:
845
+ return self.defaults[item]
846
+
847
+ def __getattr__(self, key):
848
+ return self[key]
849
+
850
+ def __setitem__(self, item, value):
851
+ self.data[item] = value
852
+
853
+ def __str__(self):
854
+ string = "\n".join([line.strip() for line in self.header.splitlines() if line.strip()]) + "\n\n"
855
+ written_keys = []
856
+ for key, value in self.defaults.items():
857
+ if value["comment"]:
858
+ string += "\n" + "\n".join(line.strip() for line in value["comment"].splitlines() if line.strip())
859
+ if key == "default_landing_page_browsing":
860
+ string += """\ndefault_landing_page_browsing = [{"label": "Author", "group_by_field": "author",
861
+ "display_count": True, "queries": ["A-D", "E-I", "J-M", "N-R", "S-Z"], "is_range": True,
862
+ "citation": [citations["author"]],}, {"label": "Title", "group_by_field": "title",
863
+ "display_count": False, "queries": ["A-D", "E-I", "J-M", "N-R", "S-Z"], "is_range": True,
864
+ "citation": [citations["author"], citations["title"], citations["year"]]}]"""
865
+ elif key == "concordance_citation":
866
+ string += f"""\n{key} = [
867
+ citations["author"], citations["title"], citations["year"], citations["div1_head"],
868
+ citations["div1_date"], citations["div2_head"], citations["div2_date"], citations["div3_head"],
869
+ citations["div3_date"], citations["speaker"], citations["resp"], citations["page"],
870
+ ]"""
871
+ elif key == "bibliography_citation":
872
+ string += f"""\n{key} = [
873
+ citations["author"], citations["title"], citations["year"],
874
+ citations["div1_head"], citations["div2_head"], citations["div3_head"],
875
+ citations["speaker"], citations["resp"], citations["page"],
876
+ ]"""
877
+ elif key in ("table_of_contents_citation", "navigation_citation", "simple_landing_citation"):
878
+ string += f"""\n{key} = [
879
+ citations["author"], citations["title"], citations["year"],
880
+ citations["pub_place"], citations["publisher"], citations["collection"],
881
+ ]"""
882
+ elif key == "aggregation_config":
883
+ string += (
884
+ f"\n{key} = "
885
+ + "[{"
886
+ + """"field": "author", "object_level": "doc", "field_citation": [citations["author"]],
887
+ "break_up_field": "title", "break_up_field_citation": [
888
+ citations["title"], citations["pub_place"], citations["publisher"], citations["collection"],citations["year"]
889
+ ],"""
890
+ + "}, {"
891
+ + """"field": "title", "object_level": "doc", "field_citation": [citations["title"],
892
+ citations["pub_place"], citations["publisher"], citations["collection"], citations["year"]],"break_up_field": None,
893
+ "break_up_field_citation": None, """
894
+ + "}]"
895
+ )
896
+ else:
897
+ string += f"\n{key} = {pretty_print(self.data[key])}\n"
898
+ written_keys.append(key)
899
+ for key, value in self.data.items():
900
+ if key not in written_keys:
901
+ string += f"\n{key} = {pretty_print(value)}\n"
902
+ written_keys.append(key)
903
+ return string
904
+
905
+ def to_json(self):
906
+ """Convert Config to JSON representation"""
907
+ out_obj = {"valid_config": True}
908
+ written = []
909
+ for key in self.defaults.keys():
910
+ out_obj[key] = self.data[key]
911
+ written.append(key)
912
+ for key in self.data:
913
+ if key not in written:
914
+ out_obj[key] = self.data[key]
915
+ written.append(key)
916
+ if self.time_series_status is False:
917
+ try:
918
+ out_obj["search_reports"].remove("time_series")
919
+ except ValueError:
920
+ pass
921
+ return json.dumps(out_obj)
922
+
923
+
924
+ def MakeWebConfig(path, **extra_values):
925
+ """Build web_config with non-default arguments"""
926
+ web_config = Config(path, WEB_CONFIG_DEFAULTS, header=WEB_CONFIG_HEADER)
927
+ if extra_values:
928
+ for key, value in extra_values.items():
929
+ web_config[key] = value
930
+ return web_config
931
+
932
+
933
+ def MakeDBConfig(path, **extra_values):
934
+ """Build db.locals with non-default arguments"""
935
+ db_config = Config(path, DB_LOCALS_DEFAULTS, header=DB_LOCALS_HEADER)
936
+ if extra_values:
937
+ for key, value in extra_values.items():
938
+ db_config[key] = value
939
+ return db_config
940
+
941
+
942
+ if __name__ == "__main__":
943
+ if sys.argv[1].endswith("cfg"):
944
+ conf = Config(sys.argv[1], WEB_CONFIG_DEFAULTS)
945
+ else:
946
+ conf = Config(sys.argv[1], DB_LOCALS_DEFAULTS)
947
+ # print(conf, file=sys.stderr)