philologic 5.2.0.2__tar.gz → 5.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {philologic-5.2.0.2 → philologic-5.2.2}/PKG-INFO +1 -1
  2. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/Config.py +5 -1
  3. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/DB.py +1 -1
  4. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/MetadataQuery.py +1 -1
  5. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/Query.py +12 -11
  6. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/QuerySyntax.py +4 -2
  7. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/__init__.py +9 -3
  8. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/access_control.py +4 -4
  9. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/__init__.py +6 -6
  10. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/aggregation.py +42 -0
  11. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/bibliography.py +18 -0
  12. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/collocation.py +15 -0
  13. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/concordance.py +23 -0
  14. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/kwic.py +22 -0
  15. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/time_series.py +26 -19
  16. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/term_expansion.py +101 -118
  17. philologic-5.2.2/philologic/utils/upgrade_gunicorn_conf.py +138 -0
  18. {philologic-5.2.0.2 → philologic-5.2.2}/philologic.egg-info/PKG-INFO +1 -1
  19. {philologic-5.2.0.2 → philologic-5.2.2}/philologic.egg-info/SOURCES.txt +2 -1
  20. {philologic-5.2.0.2 → philologic-5.2.2}/pyproject.toml +1 -1
  21. {philologic-5.2.0.2 → philologic-5.2.2}/LICENSE +0 -0
  22. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/TagCensus.py +0 -0
  23. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/__init__.py +0 -0
  24. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/LoadFilters.py +0 -0
  25. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/LoadOptions.py +0 -0
  26. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/Loader.py +0 -0
  27. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/OHCOVector.py +0 -0
  28. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/Parser.py +0 -0
  29. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/PlainTextParser.py +0 -0
  30. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/PostFilters.py +0 -0
  31. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/__init__.py +0 -0
  32. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/loadtime/__main__.py +0 -0
  33. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/FragmentParser.py +0 -0
  34. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/HitList.py +0 -0
  35. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/HitWrapper.py +0 -0
  36. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/ObjectFormatter.py +0 -0
  37. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/WSGIHandler.py +0 -0
  38. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/citations.py +0 -0
  39. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/exceptions.py +0 -0
  40. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/find_similar_words.py +0 -0
  41. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/get_text.py +0 -0
  42. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/link.py +0 -0
  43. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/multi_word_search.py +3 -3
  44. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/pages.py +0 -0
  45. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/frequency.py +0 -0
  46. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/generate_word_frequency.py +0 -0
  47. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/landing_page.py +0 -0
  48. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/navigation.py +0 -0
  49. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/reports/table_of_contents.py +0 -0
  50. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/sql_validation.py +0 -0
  51. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/runtime/web_config.py +0 -0
  52. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/shlax.py +0 -0
  53. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/shlaxtree.py +0 -0
  54. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/utils/__init__.py +0 -0
  55. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/utils/convert_entities.py +0 -0
  56. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/utils/line_count.py +0 -0
  57. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/utils/load_module.py +0 -0
  58. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/utils/metadata_type_handler.py +0 -0
  59. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/utils/pretty_print.py +0 -0
  60. {philologic-5.2.0.2 → philologic-5.2.2}/philologic/utils/sort.py +0 -0
  61. {philologic-5.2.0.2 → philologic-5.2.2}/philologic.egg-info/dependency_links.txt +0 -0
  62. {philologic-5.2.0.2 → philologic-5.2.2}/philologic.egg-info/requires.txt +0 -0
  63. {philologic-5.2.0.2 → philologic-5.2.2}/philologic.egg-info/top_level.txt +0 -0
  64. {philologic-5.2.0.2 → philologic-5.2.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: philologic
3
- Version: 5.2.0.2
3
+ Version: 5.2.2
4
4
  Summary: A concordance search engine for TEI-XML
5
5
  Author-email: Clovis Gladstone <clovisgladstone@artfl.uchicago.edu>
6
6
  License-Expression: GPL-3.0-or-later
@@ -164,7 +164,11 @@ DB_LOCALS_DEFAULTS = {
164
164
  "overflow_words": {
165
165
  "value": set(),
166
166
  "comment": "# The overflow_words variable is a set of words which are not indexed in the database, but stored as blobs in the data/overflow_words directory.",
167
- }
167
+ },
168
+ "query_patterns": {
169
+ "value": None,
170
+ "comment": "# Custom query tokenization patterns. When set, overrides the default patterns in QuerySyntax.parse_query.\n# Must be a list of (label, regex) tuples, e.g. [(\"TERM\", r'[^\\s\"]+'), ...].\n# When None, the built-in default patterns are used.",
171
+ },
168
172
  }
169
173
  DB_LOCALS_HEADER = """
170
174
  #########################################################\n
@@ -269,7 +269,7 @@ class DB:
269
269
  raw_bytes=raw_bytes,
270
270
  ascii_conversion=self.locals.ascii_conversion,
271
271
  )
272
- parsed = QuerySyntax.parse_query(qs)
272
+ parsed = QuerySyntax.parse_query(qs, query_patterns=self.locals.query_patterns)
273
273
  grouped = QuerySyntax.group_terms(parsed)
274
274
  split = Query.split_terms(grouped)
275
275
  words_per_hit = len(split)
@@ -200,7 +200,7 @@ def query_lowlevel(db, param_dict, sort_order, ascii_conversion):
200
200
  for v in values:
201
201
  parsed = "text"
202
202
  if db.locals.metadata_sql_types[column] in ("text", "int"):
203
- parsed = parse_query(v)
203
+ parsed = parse_query(v, query_patterns=db.locals.query_patterns)
204
204
  elif db.locals.metadata_sql_types[column] == "date":
205
205
  v = v.replace('"', "") # remove quotes
206
206
  parsed = parse_date_query(v)
@@ -7,24 +7,25 @@ import threading
7
7
  from bisect import bisect_left, bisect_right
8
8
  from pathlib import Path
9
9
 
10
+ # Set Numba cache directory BEFORE importing numba — otherwise Numba resolves
11
+ # its cache locator using the default (write next to source file), which fails
12
+ # when the source is in a read-only site-packages directory.
13
+ _cache_dir = os.environ.get("NUMBA_CACHE_DIR", "/var/lib/philologic5/numba_cache")
14
+ if not os.access(_cache_dir, os.W_OK):
15
+ _cache_dir = f"/tmp/philologic_numba_cache_{os.getuid()}"
16
+ os.makedirs(_cache_dir, mode=0o755, exist_ok=True)
17
+ os.environ["NUMBA_CACHE_DIR"] = _cache_dir
18
+
10
19
  import lmdb
11
20
  import numba
12
21
  import numpy as np
13
22
  import regex as re
14
23
 
24
+ numba.config.CACHE_DIR = _cache_dir
25
+
15
26
  from philologic.runtime import HitList
16
27
  from philologic.runtime.QuerySyntax import group_terms, parse_query
17
28
 
18
- # Set Numba cache directory
19
- # Try shared cache first, fall back to /tmp if permission denied
20
- cache_dir = "/var/lib/philologic5/numba_cache"
21
- if not os.access(cache_dir, os.W_OK):
22
- # In hardened containers, use per-user temp cache
23
- cache_dir = f"/tmp/philologic_numba_cache_{os.getuid()}"
24
- os.makedirs(cache_dir, mode=0o755, exist_ok=True)
25
- os.environ["NUMBA_CACHE_DIR"] = cache_dir
26
- numba.config.CACHE_DIR = cache_dir
27
-
28
29
 
29
30
  @numba.jit(nopython=True, cache=True, nogil=True)
30
31
  def _merge_two_sorted_arrays(arr1, arr2):
@@ -455,7 +456,7 @@ def query(
455
456
  ):
456
457
  """Runs concordance queries"""
457
458
  sys.stdout.flush()
458
- parsed = parse_query(terms)
459
+ parsed = parse_query(terms, query_patterns=db.locals.query_patterns)
459
460
  grouped = group_terms(parsed)
460
461
  split = split_terms(grouped)
461
462
  words_per_hit = len(split)
@@ -31,12 +31,14 @@ date_patterns = [
31
31
  ]
32
32
 
33
33
 
34
- def parse_query(qstring):
34
+ def parse_query(qstring, query_patterns=None):
35
35
  """Parse query"""
36
+ if query_patterns is None:
37
+ query_patterns = patterns
36
38
  buf = qstring[:]
37
39
  parsed = []
38
40
  while len(buf) > 0:
39
- for label, pattern in patterns:
41
+ for label, pattern in query_patterns:
40
42
  m = re.match(pattern, buf)
41
43
  if m:
42
44
  parsed.append((label, m.group()))
@@ -7,21 +7,27 @@ from philologic.runtime.get_text import get_concordance_text, get_tei_header
7
7
  from philologic.runtime.pages import page_interval
8
8
  from philologic.runtime.Query import parse_query
9
9
  from philologic.runtime.reports import (
10
+ aggregation_by_field,
11
+ aggregation_to_csv,
10
12
  bibliography_results,
13
+ bibliography_to_csv,
11
14
  collocation_results,
15
+ collocation_to_csv,
12
16
  concordance_results,
17
+ concordance_to_csv,
13
18
  frequency_results,
14
19
  generate_text_object,
15
20
  generate_time_series,
16
21
  generate_toc_object,
17
22
  generate_word_frequency,
18
23
  get_start_end_date,
19
- kwic_hit_object,
20
- kwic_results,
21
24
  group_by_metadata,
22
25
  group_by_range,
26
+ kwic_hit_object,
27
+ kwic_results,
28
+ kwic_to_csv,
23
29
  landing_page_bibliography,
24
- aggregation_by_field,
30
+ time_series_to_csv,
25
31
  )
26
32
  from philologic.runtime.web_config import WebConfig
27
33
  from philologic.runtime.WSGIHandler import WSGIHandler
@@ -309,15 +309,15 @@ def login_access(environ, request, config, headers):
309
309
  token = make_token(db)
310
310
  if token:
311
311
  h, ts = token
312
- headers.append(("Set-Cookie", f"hash={h}"))
313
- headers.append(("Set-Cookie", f"timestamp={ts}"))
312
+ headers.append(("Set-Cookie", f"hash={h}; Path=/"))
313
+ headers.append(("Set-Cookie", f"timestamp={ts}; Path=/"))
314
314
  else:
315
315
  # WORKAROUND because cookie not being sent on access_request.py request
316
316
  token = check_access(environ, config)
317
317
  if token:
318
318
  h, ts = token
319
- headers.append(("Set-Cookie", f"hash={h}"))
320
- headers.append(("Set-Cookie", f"timestamp={ts}"))
319
+ headers.append(("Set-Cookie", f"hash={h}; Path=/"))
320
+ headers.append(("Set-Cookie", f"timestamp={ts}; Path=/"))
321
321
  access = True
322
322
  else:
323
323
  access = False
@@ -1,13 +1,13 @@
1
1
  """Report exports"""
2
2
 
3
- from philologic.runtime.reports.concordance import concordance_results
4
- from philologic.runtime.reports.bibliography import bibliography_results
5
- from philologic.runtime.reports.time_series import generate_time_series, get_start_end_date
3
+ from philologic.runtime.reports.concordance import concordance_results, concordance_to_csv
4
+ from philologic.runtime.reports.bibliography import bibliography_results, bibliography_to_csv
5
+ from philologic.runtime.reports.time_series import generate_time_series, get_start_end_date, time_series_to_csv
6
6
  from philologic.runtime.reports.navigation import generate_text_object
7
7
  from philologic.runtime.reports.table_of_contents import generate_toc_object
8
- from philologic.runtime.reports.kwic import kwic_results, kwic_hit_object
8
+ from philologic.runtime.reports.kwic import kwic_results, kwic_hit_object, kwic_to_csv
9
9
  from philologic.runtime.reports.generate_word_frequency import generate_word_frequency
10
10
  from philologic.runtime.reports.frequency import frequency_results
11
- from philologic.runtime.reports.collocation import collocation_results
11
+ from philologic.runtime.reports.collocation import collocation_results, collocation_to_csv
12
12
  from philologic.runtime.reports.landing_page import landing_page_bibliography, group_by_range, group_by_metadata
13
- from philologic.runtime.reports.aggregation import aggregation_by_field
13
+ from philologic.runtime.reports.aggregation import aggregation_by_field, aggregation_to_csv
@@ -1,6 +1,9 @@
1
1
  # /usr/bin/env python3
2
2
  """Report designed to group results by metadata with additional breakdown optional"""
3
3
 
4
+ import csv
5
+ import io
6
+
4
7
  import numpy as np
5
8
 
6
9
  from philologic.runtime.DB import DB
@@ -137,6 +140,45 @@ def aggregation_by_field(request, config):
137
140
  }
138
141
 
139
142
 
143
+ def aggregation_to_csv(results, break_up_field_name=""):
144
+ """Convert aggregation results to CSV string.
145
+
146
+ Each breakdown entry gets its own row. Rows from the same group
147
+ are contiguous, with the group-level metadata repeated.
148
+ """
149
+ if not results:
150
+ return ""
151
+ output = io.StringIO()
152
+ first = results[0]
153
+ group_keys = sorted(k for k in first["metadata_fields"].keys() if k not in ("field_name", "philo_id"))
154
+ has_breakdown = break_up_field_name and any(r["break_up_field"] for r in results)
155
+ if has_breakdown:
156
+ # Collect all metadata keys from breakdown entries
157
+ breakdown_keys = set()
158
+ for result in results:
159
+ for sub in result["break_up_field"]:
160
+ breakdown_keys.update(k for k in sub["metadata_fields"].keys() if k not in ("field_name", "philo_id"))
161
+ breakdown_keys = sorted(breakdown_keys - set(group_keys))
162
+ fieldnames = group_keys + ["group_count"] + breakdown_keys + ["count"]
163
+ else:
164
+ fieldnames = group_keys + ["count"]
165
+ writer = csv.DictWriter(output, fieldnames=fieldnames)
166
+ writer.writeheader()
167
+ for result in results:
168
+ group_fields = {k: result["metadata_fields"].get(k, "") for k in group_keys}
169
+ if has_breakdown and result["break_up_field"]:
170
+ for sub in result["break_up_field"]:
171
+ row = {**group_fields, "group_count": result["count"]}
172
+ for k in breakdown_keys:
173
+ row[k] = sub["metadata_fields"].get(k, "")
174
+ row["count"] = sub["count"]
175
+ writer.writerow(row)
176
+ else:
177
+ row = {**group_fields, "count": result["count"]}
178
+ writer.writerow(row)
179
+ return output.getvalue()
180
+
181
+
140
182
  def __expand_hits_counted(hits, metadata_type):
141
183
  """Stream sorted hitlist with numpy, return per-ID hit counts.
142
184
 
@@ -1,6 +1,8 @@
1
1
  #!/var/lib/philologic5/philologic_env/bin/python3
2
2
  """Bibliography results"""
3
3
 
4
+ import csv
5
+ import io
4
6
 
5
7
  from philologic.runtime.citations import citation_links, citations
6
8
  from philologic.runtime.DB import DB
@@ -77,3 +79,19 @@ def bibliography_results(request, config):
77
79
  bibliography_object["query_done"] = hits.done
78
80
  bibliography_object["result_type"] = result_type
79
81
  return bibliography_object, hits
82
+
83
+
84
+ def bibliography_to_csv(results):
85
+ """Convert bibliography results to CSV string."""
86
+ if not results:
87
+ return ""
88
+ output = io.StringIO()
89
+ metadata_keys = sorted(results[0]["metadata_fields"].keys())
90
+ fieldnames = ["philo_id"] + metadata_keys
91
+ writer = csv.DictWriter(output, fieldnames=fieldnames)
92
+ writer.writeheader()
93
+ for result in results:
94
+ row = {"philo_id": " ".join(str(x) for x in result["philo_id"])}
95
+ row.update(result["metadata_fields"])
96
+ writer.writerow(row)
97
+ return output.getvalue()
@@ -580,6 +580,21 @@ def collocation_results(request, config):
580
580
  return collocation_object
581
581
 
582
582
 
583
+ def collocation_to_csv(collocates):
584
+ """Convert collocation results (list of (word, count) tuples) to CSV string."""
585
+ import csv
586
+ import io
587
+
588
+ if not collocates:
589
+ return ""
590
+ output = io.StringIO()
591
+ writer = csv.DictWriter(output, fieldnames=["collocate", "count"])
592
+ writer.writeheader()
593
+ for word, count in collocates:
594
+ writer.writerow({"collocate": word, "count": count})
595
+ return output.getvalue()
596
+
597
+
583
598
  def atomic_pickle_dump(data, file_path):
584
599
  """Write pickle atomically to prevent truncated reads from concurrent requests."""
585
600
  dir_path = os.path.dirname(file_path)
@@ -1,6 +1,9 @@
1
1
  #!/var/lib/philologic5/philologic_env/bin/python3
2
2
  """Concordance report"""
3
3
 
4
+ import csv
5
+ import io
6
+
4
7
  import regex as re
5
8
  from philologic.runtime.citations import citation_links, citations
6
9
  from philologic.runtime.DB import DB
@@ -60,3 +63,23 @@ def concordance_results(request, config):
60
63
  concordance_object["results_length"] = len(hits)
61
64
  concordance_object["query_done"] = hits.done
62
65
  return concordance_object
66
+
67
+
68
+ def concordance_to_csv(results, filter_html=False):
69
+ """Convert concordance results to CSV string."""
70
+ if not results:
71
+ return ""
72
+ tags_re = re.compile(r"<[^>]+>")
73
+ output = io.StringIO()
74
+ metadata_keys = sorted(results[0]["metadata_fields"].keys())
75
+ fieldnames = ["philo_id", "context"] + metadata_keys
76
+ writer = csv.DictWriter(output, fieldnames=fieldnames)
77
+ writer.writeheader()
78
+ for result in results:
79
+ context = result["context"]
80
+ if filter_html:
81
+ context = tags_re.sub("", context).strip()
82
+ row = {"philo_id": " ".join(str(x) for x in result["philo_id"]), "context": context}
83
+ row.update(result["metadata_fields"])
84
+ writer.writerow(row)
85
+ return output.getvalue()
@@ -1,6 +1,8 @@
1
1
  #!/var/lib/philologic5/philologic_env/bin/python3
2
2
  """KWIC results"""
3
3
 
4
+ import csv
5
+ import io
4
6
 
5
7
  import regex as re
6
8
  from philologic.runtime.citations import citation_links, citations
@@ -92,3 +94,23 @@ def kwic_hit_object(hit, config, db):
92
94
  }
93
95
 
94
96
  return kwic_result
97
+
98
+
99
+ def kwic_to_csv(results, filter_html=False):
100
+ """Convert KWIC results to CSV string."""
101
+ if not results:
102
+ return ""
103
+ tags_re = re.compile(r"<[^>]+>")
104
+ output = io.StringIO()
105
+ metadata_keys = sorted(results[0]["metadata_fields"].keys())
106
+ fieldnames = ["philo_id", "context"] + metadata_keys
107
+ writer = csv.DictWriter(output, fieldnames=fieldnames)
108
+ writer.writeheader()
109
+ for result in results:
110
+ context = result["context"]
111
+ if filter_html:
112
+ context = tags_re.sub("", context).strip()
113
+ row = {"philo_id": " ".join(str(x) for x in result["philo_id"]), "context": context}
114
+ row.update(result["metadata_fields"])
115
+ writer.writerow(row)
116
+ return output.getvalue()
@@ -2,7 +2,6 @@
2
2
  """Time series"""
3
3
 
4
4
  import os
5
- import time
6
5
 
7
6
  import numba
8
7
  import numpy as np
@@ -96,7 +95,6 @@ def _bucket_hits_by_year(doc_ids, year_array, start_date, interval, n_ranges):
96
95
 
97
96
 
98
97
  def generate_time_series(request, config):
99
- t0 = time.time()
100
98
  db = DB(config.db_path + "/data/")
101
99
  year_field = validate_column(config.time_series_year_field, db)
102
100
  time_series_object = {"query": dict([i for i in request]), "query_done": False}
@@ -109,25 +107,24 @@ def generate_time_series(request, config):
109
107
  time_series_object["results"] = {"absolute_count": {}, "date_count": {}}
110
108
  return time_series_object
111
109
 
112
- interval = int(request.year_interval)
110
+ try:
111
+ interval = int(request.year_interval)
112
+ except (ValueError, TypeError):
113
+ interval = int(config.time_series_interval)
113
114
 
114
115
  # Get cached doc→year mapping (SQL only on first request per worker)
115
- t1 = time.time()
116
116
  year_array, year_word_counts, year_doc_counts, min_date, max_date = _get_doc_year_data(db, year_field)
117
- print(f"[time_series] doc year data: {time.time()-t1:.3f}s", flush=True)
118
117
 
119
118
  # Resolve start/end dates
120
119
  start_date = int(request.start_date) if request.start_date else min_date
121
120
  end_date = int(request.end_date) if request.end_date else max_date
122
121
 
123
122
  # Fire the word query now that we have start/end dates
124
- t1 = time.time()
125
123
  hits = None
126
124
  if request.q:
127
125
  metadata = dict(request.metadata)
128
126
  metadata[year_field] = "%d-%d" % (start_date, end_date)
129
127
  hits = db.query(request["q"], request["method"], request["arg"], raw_results=True, **metadata)
130
- print(f"[time_series] db.query dispatch: {time.time()-t1:.3f}s", flush=True)
131
128
 
132
129
  # Generate date ranges for output
133
130
  date_ranges = []
@@ -150,27 +147,18 @@ def generate_time_series(request, config):
150
147
 
151
148
  # Absolute hit counts: wait for search, then vectorized bucketing
152
149
  if hits is not None:
153
- t1 = time.time()
154
150
  hits.finish()
155
- t_finish = time.time() - t1
156
151
  total_hits = len(hits)
157
- print(f"[time_series] hits.finish() wait ({total_hits} hits): {t_finish:.3f}s", flush=True)
158
152
 
159
153
  if total_hits > 0:
160
- t1 = time.time()
161
154
  hit_length = hits.length
162
155
  mm = np.memmap(hits.filename, dtype="u4", mode="r").reshape(-1, hit_length)
163
156
  doc_ids = np.ascontiguousarray(mm[:, 0])
164
157
  del mm # release mmap immediately
165
- t_read = time.time() - t1
166
158
 
167
- # Single-pass JIT on contiguous doc_id column
168
- t1 = time.time()
169
159
  bin_counts, total_hits = _bucket_hits_by_year(
170
160
  doc_ids, year_array, start_date, interval, n_ranges
171
161
  )
172
- t_jit = time.time() - t1
173
- print(f"[time_series] mmap+extract doc_ids: {t_read:.3f}s, JIT bucket: {t_jit:.3f}s ({total_hits} hits in {n_ranges} bins)", flush=True)
174
162
  else:
175
163
  bin_counts = np.zeros(n_ranges, dtype=np.int64)
176
164
  else:
@@ -182,7 +170,6 @@ def generate_time_series(request, config):
182
170
  total_hits += int(bin_counts[i])
183
171
 
184
172
  # Build absolute_count output matching expected format
185
- t1 = time.time()
186
173
  absolute_count = {}
187
174
  for i, (range_start, date_range) in enumerate(date_ranges):
188
175
  params = {"report": "concordance", "start": "0", "end": "0"}
@@ -193,7 +180,6 @@ def generate_time_series(request, config):
193
180
  "count": int(bin_counts[i]),
194
181
  "url": url,
195
182
  }
196
- print(f"[time_series] build output ({n_ranges} ranges): {time.time()-t1:.3f}s", flush=True)
197
183
 
198
184
  time_series_object["results_length"] = int(total_hits)
199
185
  time_series_object["more_results"] = False
@@ -202,10 +188,31 @@ def generate_time_series(request, config):
202
188
  "date_count": {str(date): count for date, count in date_counts.items()},
203
189
  }
204
190
 
205
- print(f"[time_series] TOTAL: {time.time()-t0:.3f}s", flush=True)
206
191
  return time_series_object
207
192
 
208
193
 
194
+ def time_series_to_csv(results):
195
+ """Convert time series results to CSV string."""
196
+ import csv
197
+ import io
198
+
199
+ absolute_count = results.get("absolute_count", {})
200
+ date_count = results.get("date_count", {})
201
+ if not absolute_count:
202
+ return ""
203
+ output = io.StringIO()
204
+ writer = csv.DictWriter(output, fieldnames=["period", "count", "total_words"])
205
+ writer.writeheader()
206
+ for period_start in sorted(absolute_count.keys(), key=int):
207
+ entry = absolute_count[period_start]
208
+ writer.writerow({
209
+ "period": entry["label"],
210
+ "count": entry["count"],
211
+ "total_words": date_count.get(period_start, ""),
212
+ })
213
+ return output.getvalue()
214
+
215
+
209
216
  def get_start_end_date(db, config, start_date=None, end_date=None):
210
217
  """Get start and end date of dataset"""
211
218
  year_field = validate_column(config.time_series_year_field, db)
@@ -12,45 +12,13 @@ import regex as re
12
12
  from unidecode import unidecode
13
13
 
14
14
 
15
- # Process-level cache: one LMDB env per lmdb_path, kept open for the
16
- # lifetime of the worker process (avoids repeated open/close overhead).
17
- _norm_lmdb_cache: dict[str, lmdb.Environment] = {}
18
- # db_paths for which word_forms.lmdb is absent (no lemma/attr flat files)
19
- _no_forms_lmdb: set[str] = set()
20
-
21
15
  # Flat files (in frequencies/) that feed word_forms.lmdb
22
16
  _FORMS_FLAT_FILES = ("lemmas", "word_attributes", "lemma_word_attributes")
23
17
 
24
18
 
25
- def get_lmdb_env(lmdb_path: str) -> lmdb.Environment:
26
- """Return (and cache) a read-only LMDB environment for the given path."""
27
- env = _norm_lmdb_cache.get(lmdb_path)
28
- if env is not None:
29
- return env
30
- env = lmdb.open(lmdb_path, readonly=True, lock=False, readahead=False, max_spare_txns=4)
31
- _norm_lmdb_cache[lmdb_path] = env
32
- return env
33
-
34
-
35
- def _get_norm_env(freq_file: str) -> lmdb.Environment:
36
- """Return (and cache) the norm_word.lmdb env (built at index time by PostFilters)."""
37
- return get_lmdb_env(freq_file + ".lmdb")
38
-
39
-
40
- def _get_forms_env(db_path: str) -> lmdb.Environment | None:
41
- """Return (and cache) the word_forms.lmdb env (built at index time by PostFilters).
42
-
43
- Returns None if the database has no word_forms.lmdb (no lemma/attr data).
44
- """
45
- lmdb_path = os.path.join(db_path, "frequencies", "word_forms.lmdb")
46
- if lmdb_path in _norm_lmdb_cache:
47
- return _norm_lmdb_cache[lmdb_path]
48
- if db_path in _no_forms_lmdb:
49
- return None
50
- if not os.path.exists(lmdb_path):
51
- _no_forms_lmdb.add(db_path)
52
- return None
53
- return get_lmdb_env(lmdb_path)
19
+ def _open_lmdb(lmdb_path: str) -> lmdb.Environment:
20
+ """Open a read-only LMDB environment. Caller should close it when done."""
21
+ return lmdb.open(lmdb_path, readonly=True, lock=False, readahead=False)
54
22
 
55
23
 
56
24
  def _norm_key(token: str, lowercase: bool = True) -> bytes:
@@ -258,9 +226,10 @@ def expand_query_not(split, freq_file, dest_fh, ascii_conversion, lowercase=True
258
226
  forms, and writes the result to dest_fh.
259
227
  Groups are separated by blank lines (consumed by get_word_groups()).
260
228
  """
261
- env = _get_norm_env(freq_file)
229
+ env = _open_lmdb(freq_file + ".lmdb")
262
230
  db_path = os.path.normpath(os.path.join(os.path.dirname(freq_file), ".."))
263
- forms_env = _get_forms_env(db_path)
231
+ forms_lmdb_path = os.path.join(db_path, "frequencies", "word_forms.lmdb")
232
+ forms_env = _open_lmdb(forms_lmdb_path) if os.path.exists(forms_lmdb_path) else None
264
233
  first = True
265
234
 
266
235
  with env.begin(buffers=True) as txn:
@@ -303,6 +272,9 @@ def expand_query_not(split, freq_file, dest_fh, ascii_conversion, lowercase=True
303
272
  dest_fh.write(form + "\n")
304
273
  except TypeError:
305
274
  dest_fh.write((form + "\n").encode("utf-8"))
275
+ env.close()
276
+ if forms_env is not None:
277
+ forms_env.close()
306
278
 
307
279
 
308
280
  # ── Metadata inverted word index ──────────────────────────────────────────────
@@ -364,24 +336,22 @@ def build_metadata_word_index(db_path: str) -> int:
364
336
  return len(index)
365
337
 
366
338
 
367
- def _get_metadata_index_env(db_path: str) -> lmdb.Environment:
368
- """Return (and cache) the metadata_word_index.lmdb env (built at index time by PostFilters)."""
369
- lmdb_path = os.path.join(db_path, "frequencies", _META_LMDB_NAME)
370
- return get_lmdb_env(lmdb_path)
371
-
372
339
 
373
340
  def metadata_word_lookup(db_path: str, field: str, term: str) -> list[str]:
374
341
  """Look up metadata values containing term as a whole word.
375
342
 
376
343
  Returns list of original metadata values from the inverted word index.
377
344
  """
378
- env = _get_metadata_index_env(db_path)
379
- key = f"{field}\x00{term}".encode("utf-8")
380
- with env.begin(buffers=True) as txn:
381
- val = txn.get(key)
382
- if val is None:
383
- return []
384
- return bytes(val).decode("utf-8").split("\x00")
345
+ env = _open_lmdb(os.path.join(db_path, "frequencies", _META_LMDB_NAME))
346
+ try:
347
+ key = f"{field}\x00{term}".encode("utf-8")
348
+ with env.begin(buffers=True) as txn:
349
+ val = txn.get(key)
350
+ if val is None:
351
+ return []
352
+ return bytes(val).decode("utf-8").split("\x00")
353
+ finally:
354
+ env.close()
385
355
 
386
356
 
387
357
  def metadata_word_regex_scan(db_path: str, field: str, pattern: str) -> list[str]:
@@ -391,31 +361,34 @@ def metadata_word_regex_scan(db_path: str, field: str, pattern: str) -> list[str
391
361
  indexed word. Returns deduplicated list of original metadata values
392
362
  from all matching words.
393
363
  """
394
- env = _get_metadata_index_env(db_path)
395
- field_prefix = f"{field}\x00".encode("utf-8")
396
- compiled = re.compile(pattern)
397
- seen: set[str] = set()
398
- results: list[str] = []
399
- with env.begin(buffers=True) as txn:
400
- cursor = txn.cursor()
401
- try:
402
- if not cursor.set_range(field_prefix):
403
- return results
404
- while True:
405
- k = bytes(cursor.key())
406
- if not k.startswith(field_prefix):
407
- break
408
- word = k[len(field_prefix):].decode("utf-8", errors="replace")
409
- if compiled.search(word):
410
- for val in bytes(cursor.value()).decode("utf-8").split("\x00"):
411
- if val not in seen:
412
- seen.add(val)
413
- results.append(val)
414
- if not cursor.next():
415
- break
416
- finally:
417
- cursor.close()
418
- return results
364
+ env = _open_lmdb(os.path.join(db_path, "frequencies", _META_LMDB_NAME))
365
+ try:
366
+ field_prefix = f"{field}\x00".encode("utf-8")
367
+ compiled = re.compile(pattern)
368
+ seen: set[str] = set()
369
+ results: list[str] = []
370
+ with env.begin(buffers=True) as txn:
371
+ cursor = txn.cursor()
372
+ try:
373
+ if not cursor.set_range(field_prefix):
374
+ return results
375
+ while True:
376
+ k = bytes(cursor.key())
377
+ if not k.startswith(field_prefix):
378
+ break
379
+ word = k[len(field_prefix):].decode("utf-8", errors="replace")
380
+ if compiled.search(word):
381
+ for val in bytes(cursor.value()).decode("utf-8").split("\x00"):
382
+ if val not in seen:
383
+ seen.add(val)
384
+ results.append(val)
385
+ if not cursor.next():
386
+ break
387
+ finally:
388
+ cursor.close()
389
+ return results
390
+ finally:
391
+ env.close()
419
392
 
420
393
 
421
394
  def metadata_word_prefix_scan(db_path: str, field: str, prefix: str,
@@ -425,30 +398,33 @@ def metadata_word_prefix_scan(db_path: str, field: str, prefix: str,
425
398
  Returns deduplicated list of original metadata values from all matching words.
426
399
  Used for metadata autocomplete.
427
400
  """
428
- env = _get_metadata_index_env(db_path)
429
- key_prefix = f"{field}\x00{prefix}".encode("utf-8")
430
- seen: set[str] = set()
431
- results: list[str] = []
432
- with env.begin(buffers=True) as txn:
433
- cursor = txn.cursor()
434
- try:
435
- if not cursor.set_range(key_prefix):
436
- return results
437
- while True:
438
- k = bytes(cursor.key())
439
- if not k.startswith(key_prefix):
440
- break
441
- for val in bytes(cursor.value()).decode("utf-8").split("\x00"):
442
- if val not in seen:
443
- seen.add(val)
444
- results.append(val)
445
- if len(results) >= max_results:
446
- return results
447
- if not cursor.next():
448
- break
449
- finally:
450
- cursor.close()
451
- return results
401
+ env = _open_lmdb(os.path.join(db_path, "frequencies", _META_LMDB_NAME))
402
+ try:
403
+ key_prefix = f"{field}\x00{prefix}".encode("utf-8")
404
+ seen: set[str] = set()
405
+ results: list[str] = []
406
+ with env.begin(buffers=True) as txn:
407
+ cursor = txn.cursor()
408
+ try:
409
+ if not cursor.set_range(key_prefix):
410
+ return results
411
+ while True:
412
+ k = bytes(cursor.key())
413
+ if not k.startswith(key_prefix):
414
+ break
415
+ for val in bytes(cursor.value()).decode("utf-8").split("\x00"):
416
+ if val not in seen:
417
+ seen.add(val)
418
+ results.append(val)
419
+ if len(results) >= max_results:
420
+ return results
421
+ if not cursor.next():
422
+ break
423
+ finally:
424
+ cursor.close()
425
+ return results
426
+ finally:
427
+ env.close()
452
428
 
453
429
 
454
430
  def expand_autocomplete(kind: str, token: str, frequency_file: str, db_path: str,
@@ -469,29 +445,36 @@ def expand_autocomplete(kind: str, token: str, frequency_file: str, db_path: str
469
445
  raw_token = token[1:-1] if kind == "QUOTE" else token
470
446
  if not raw_token:
471
447
  return []
472
- env = _get_norm_env(frequency_file)
473
- with env.begin(buffers=True) as txn:
474
- if _is_regex_pattern(raw_token):
475
- norm_prefix, pattern_str = _normalize_pattern(raw_token, lowercase and ascii_conversion)
476
- return _lmdb_expand_term(txn, norm_prefix, pattern_str, max_results)
477
- elif ascii_conversion:
478
- norm_prefix = _norm_key(raw_token, lowercase)
479
- return _lmdb_expand_term(txn, norm_prefix, None, max_results)
480
- else:
481
- # ascii_conversion=False: query token is the norm key as-is
482
- norm_prefix = raw_token.lower().encode("utf-8") if lowercase else raw_token.encode("utf-8")
483
- return _lmdb_expand_term(txn, norm_prefix, None, max_results)
448
+ env = _open_lmdb(frequency_file + ".lmdb")
449
+ try:
450
+ with env.begin(buffers=True) as txn:
451
+ if _is_regex_pattern(raw_token):
452
+ norm_prefix, pattern_str = _normalize_pattern(raw_token, lowercase and ascii_conversion)
453
+ return _lmdb_expand_term(txn, norm_prefix, pattern_str, max_results)
454
+ elif ascii_conversion:
455
+ norm_prefix = _norm_key(raw_token, lowercase)
456
+ return _lmdb_expand_term(txn, norm_prefix, None, max_results)
457
+ else:
458
+ # ascii_conversion=False: query token is the norm key as-is
459
+ norm_prefix = raw_token.lower().encode("utf-8") if lowercase else raw_token.encode("utf-8")
460
+ return _lmdb_expand_term(txn, norm_prefix, None, max_results)
461
+ finally:
462
+ env.close()
484
463
 
485
464
  elif kind in ("LEMMA", "ATTR", "LEMMA_ATTR"):
486
465
  if not token:
487
466
  return []
488
- scan_env = _get_forms_env(db_path) or get_lmdb_env(os.path.join(db_path, "words.lmdb"))
489
- with scan_env.begin(buffers=True) as txn:
490
- if _is_regex_pattern(token):
491
- literal, meta = _split_literal_prefix(token)
492
- prefix_bytes = literal.encode("utf-8")
493
- return _lmdb_scan_keys(txn, prefix_bytes, literal + meta, max_results)
494
- else:
495
- return _lmdb_scan_keys(txn, token.encode("utf-8"), None, max_results)
467
+ forms_lmdb_path = os.path.join(db_path, "frequencies", "word_forms.lmdb")
468
+ scan_env = _open_lmdb(forms_lmdb_path) if os.path.exists(forms_lmdb_path) else _open_lmdb(os.path.join(db_path, "words.lmdb"))
469
+ try:
470
+ with scan_env.begin(buffers=True) as txn:
471
+ if _is_regex_pattern(token):
472
+ literal, meta = _split_literal_prefix(token)
473
+ prefix_bytes = literal.encode("utf-8")
474
+ return _lmdb_scan_keys(txn, prefix_bytes, literal + meta, max_results)
475
+ else:
476
+ return _lmdb_scan_keys(txn, token.encode("utf-8"), None, max_results)
477
+ finally:
478
+ scan_env.close()
496
479
 
497
480
  return []
@@ -0,0 +1,138 @@
1
+ """Upgrade gunicorn.conf.py while preserving user customizations.
2
+
3
+ Compares the old installed gunicorn.conf.py against the old shipped defaults
4
+ (gunicorn.conf.defaults.py) to detect user customizations, then replaces
5
+ the corresponding lines in the new version in place.
6
+
7
+ Settings that the user never changed get the new defaults automatically.
8
+ Settings the user explicitly changed are preserved at their original location.
9
+ """
10
+
11
+ import ast
12
+ import re
13
+
14
+
15
+ # Settings that can be safely merged across upgrades.
16
+ # Hooks, imports, and computed values are always taken from the new version.
17
+ MERGEABLE_SETTINGS = {
18
+ "bind",
19
+ "workers",
20
+ "timeout",
21
+ "max_requests",
22
+ "max_requests_jitter",
23
+ "preload_app",
24
+ "proc_name",
25
+ "accesslog",
26
+ "errorlog",
27
+ "loglevel",
28
+ "capture_output",
29
+ }
30
+
31
+
32
+ def _load_conf_values(path):
33
+ """Extract simple top-level assignments from a Python config file using AST.
34
+
35
+ Only extracts assignments of literal values (strings, numbers, booleans, None)
36
+ for settings in MERGEABLE_SETTINGS. Ignores function calls, imports, and
37
+ computed values — no code is executed.
38
+ """
39
+ with open(path) as f:
40
+ tree = ast.parse(f.read(), filename=path)
41
+
42
+ values = {}
43
+ for node in ast.iter_child_nodes(tree):
44
+ if not isinstance(node, ast.Assign):
45
+ continue
46
+ if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name):
47
+ continue
48
+ name = node.targets[0].id
49
+ if name not in MERGEABLE_SETTINGS:
50
+ continue
51
+ try:
52
+ values[name] = ast.literal_eval(node.value)
53
+ except (ValueError, TypeError):
54
+ # Not a literal (e.g. min(cpu_count(), 4)) — skip, can't merge
55
+ pass
56
+ return values
57
+
58
+
59
+ def _load_conf_names(path):
60
+ """Extract all top-level assignment names from a config file.
61
+
62
+ Unlike _load_conf_values, this returns names even for non-literal values
63
+ (e.g. min(cpu_count(), 4)), so we can detect which settings exist in the file.
64
+ """
65
+ with open(path) as f:
66
+ tree = ast.parse(f.read(), filename=path)
67
+ names = set()
68
+ for node in ast.iter_child_nodes(tree):
69
+ if not isinstance(node, ast.Assign):
70
+ continue
71
+ if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name):
72
+ continue
73
+ names.add(node.targets[0].id)
74
+ return names
75
+
76
+
77
+ def _replace_setting_in_file(filepath, name, value):
78
+ """Replace a setting's value in-place in a Python config file.
79
+
80
+ Finds the line matching `name = ...` and replaces it with the new value.
81
+ """
82
+ with open(filepath) as f:
83
+ content = f.read()
84
+
85
+ # Match the assignment line: `name = <anything>` (not inside a comment)
86
+ pattern = re.compile(rf'^({re.escape(name)}\s*=\s*).*$', re.MULTILINE)
87
+ replacement = rf'\g<1>{value!r}'
88
+ new_content, count = pattern.subn(replacement, content)
89
+
90
+ if count > 0:
91
+ with open(filepath, "w") as f:
92
+ f.write(new_content)
93
+ return True
94
+ return False
95
+
96
+
97
+ def upgrade_gunicorn_conf(old_conf, old_defaults, new_conf, new_defaults=None):
98
+ """Upgrade gunicorn.conf.py preserving user customizations.
99
+
100
+ The new conf and defaults files should already be in their final location
101
+ (e.g. copied by install.sh). This function reads the OLD backups to detect
102
+ customizations, then replaces the corresponding values in the new conf.
103
+
104
+ Args:
105
+ old_conf: path to backup of the previously installed gunicorn.conf.py
106
+ old_defaults: path to backup of the previously installed gunicorn.conf.defaults.py
107
+ new_conf: path to the new gunicorn.conf.py (already in place)
108
+ new_defaults: path to the new gunicorn.conf.defaults.py (already in place)
109
+
110
+ Returns:
111
+ List of setting names that were preserved from the old config.
112
+ """
113
+ # Load values from the old files
114
+ prev_defaults = _load_conf_values(old_defaults)
115
+ prev_conf = _load_conf_values(old_conf)
116
+
117
+ # Find settings the user explicitly changed from the original defaults
118
+ user_customizations = {}
119
+ for key in MERGEABLE_SETTINGS:
120
+ if key in prev_conf and key in prev_defaults:
121
+ if prev_conf[key] != prev_defaults[key]:
122
+ user_customizations[key] = prev_conf[key]
123
+ elif key in prev_conf and key not in prev_defaults:
124
+ # User added a setting that wasn't in the defaults (e.g. user/group)
125
+ user_customizations[key] = prev_conf[key]
126
+
127
+ # Only preserve customizations for settings that exist in the new defaults.
128
+ # Settings removed from the new defaults (e.g. worker_class, threads) are
129
+ # intentionally dropped — even if the user had customized them.
130
+ new_default_names = _load_conf_names(new_defaults) if new_defaults else set()
131
+ preserved = []
132
+ for key, value in user_customizations.items():
133
+ if new_default_names and key not in new_default_names:
134
+ continue
135
+ if _replace_setting_in_file(new_conf, key, value):
136
+ preserved.append(key)
137
+
138
+ return preserved
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: philologic
3
- Version: 5.2.0.2
3
+ Version: 5.2.2
4
4
  Summary: A concordance search engine for TEI-XML
5
5
  Author-email: Clovis Gladstone <clovisgladstone@artfl.uchicago.edu>
6
6
  License-Expression: GPL-3.0-or-later
@@ -58,4 +58,5 @@ philologic/utils/line_count.py
58
58
  philologic/utils/load_module.py
59
59
  philologic/utils/metadata_type_handler.py
60
60
  philologic/utils/pretty_print.py
61
- philologic/utils/sort.py
61
+ philologic/utils/sort.py
62
+ philologic/utils/upgrade_gunicorn_conf.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "philologic"
7
- version = "5.2.0.2"
7
+ version = "5.2.2"
8
8
  authors = [
9
9
  { name = "Clovis Gladstone", email = "clovisgladstone@artfl.uchicago.edu" },
10
10
  ]
File without changes
@@ -8,7 +8,6 @@ proxy, exact_cooc, sentence).
8
8
 
9
9
  import os
10
10
 
11
- import lmdb
12
11
  import numba
13
12
  import numpy as np
14
13
 
@@ -20,6 +19,8 @@ if not os.access(cache_dir, os.W_OK):
20
19
  os.environ["NUMBA_CACHE_DIR"] = cache_dir
21
20
  numba.config.CACHE_DIR = cache_dir
22
21
 
22
+ import lmdb
23
+
23
24
  from philologic.runtime.Query import (
24
25
  _find_doc_boundaries,
25
26
  _load_word_arrays,
@@ -901,7 +902,6 @@ def search_phrase(db_path, hitlist_filename, overflow_words, corpus_file=None):
901
902
  if not flushed:
902
903
  output_file.flush()
903
904
  flushed = True
904
-
905
905
  env.close()
906
906
 
907
907
 
@@ -1208,5 +1208,5 @@ def _search_two_groups_batched(db_path, hitlist_filename, word_groups, overflow_
1208
1208
  if not flushed:
1209
1209
  output_file.flush()
1210
1210
  flushed = True
1211
-
1212
1211
  env.close()
1212
+
File without changes