fstdtools 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. fstdtools-0.0.3/PKG-INFO +142 -0
  2. fstdtools-0.0.3/README.md +129 -0
  3. fstdtools-0.0.3/VERSION +1 -0
  4. fstdtools-0.0.3/fstdtools/cli.py +389 -0
  5. fstdtools-0.0.3/fstdtools.egg-info/PKG-INFO +142 -0
  6. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools.egg-info/requires.txt +1 -1
  7. {fstdtools-0.0.1 → fstdtools-0.0.3}/pyproject.toml +2 -2
  8. fstdtools-0.0.1/PKG-INFO +0 -15
  9. fstdtools-0.0.1/README.md +0 -2
  10. fstdtools-0.0.1/VERSION +0 -1
  11. fstdtools-0.0.1/fstdtools/cli.py +0 -130
  12. fstdtools-0.0.1/fstdtools.egg-info/PKG-INFO +0 -15
  13. {fstdtools-0.0.1 → fstdtools-0.0.3}/LICENSE +0 -0
  14. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/__init__.py +0 -0
  15. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/__main__.py +0 -0
  16. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/convert.py +0 -0
  17. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/mdict/__init__.py +0 -0
  18. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/mdict/lzo.py +0 -0
  19. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/mdict/pureSalsa20.py +0 -0
  20. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/mdict/readmdict.py +0 -0
  21. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/mdict/ripemd128.py +0 -0
  22. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools/mdict/writemdict.py +0 -0
  23. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools.egg-info/SOURCES.txt +0 -0
  24. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools.egg-info/dependency_links.txt +0 -0
  25. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools.egg-info/entry_points.txt +0 -0
  26. {fstdtools-0.0.1 → fstdtools-0.0.3}/fstdtools.egg-info/top_level.txt +0 -0
  27. {fstdtools-0.0.1 → fstdtools-0.0.3}/setup.cfg +0 -0
@@ -0,0 +1,142 @@
1
+ Metadata-Version: 2.4
2
+ Name: fstdtools
3
+ Version: 0.0.3
4
+ Summary: CLI tools to compile/search/list/info fstd dictionary and convert mdx/mdd to fstdx/fstdd
5
+ Author-email: Moujie Qin <moujieqin@gmail.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: fstd>=0.1.6
10
+ Requires-Dist: click>=8.0
11
+ Requires-Dist: tqdm>=4.64.0
12
+ Dynamic: license-file
13
+
14
+ # fstdtools
15
+ A command line tool to compile/search/list/info [fstd](https://github.com/MouJieQin/fstd) dictionary and convert mdx/mdd to fstdx/fstdd.
16
+
17
+ ## Install
18
+
19
+ ```
20
+ pip install fstdtools
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ### Compile
26
+
27
+ Convert mdx/mdd to fstdx/fstdd
28
+
29
+ ```
30
+ fstdtools write dict.mdx
31
+ fstdtools write dict.mdd
32
+ ```
33
+
34
+ Compile a raw txt file to fstdx
35
+
36
+ ```
37
+ fstdtools write dict.txt
38
+ ```
39
+
40
+ > 1. The first entry requires no preceding delimiter: write the entry word (key) directly, followed by its corresponding definition (value). Definitions can span multiple lines, but entry words must stay on a single line.
41
+ > 2. Starting from the second entry, each entry word (key) must be preceded by the delimiter `</>`. Entry words must still be written on one line, while definitions can span multiple lines.
42
+ > 3. The end of each complete entry (entry word + corresponding definition) must be marked with the delimiter `</>` as a closing tag.
43
+
44
+
45
+ ````
46
+ Ab
47
+ The definition of Ab
48
+ </>
49
+ Ababdeh
50
+ The definition of Ababdeh
51
+ </>
52
+ Abby
53
+ The definition of Abby
54
+ </>
55
+ ...
56
+ ````
57
+
58
+ Compile a directory to fstdd
59
+
60
+ ```
61
+ # data is a directory
62
+ fstdtools write data dict.mdd
63
+ ```
64
+
65
+ It's supported to compile a fstdx from a fstdx file, but not supported for fstdd now.
66
+
67
+ ```
68
+ fstdtools -v write -b 8 -c 15 -d 130 -T "dict titile" -D "dict description" dict.fstdx dict.15-8-130.fstdx
69
+ ```
70
+
71
+ ### Extract
72
+
73
+ Extract raw text from a fstdx
74
+
75
+ ```
76
+ fstdtools extract dict.fstdx
77
+ ```
78
+
79
+ Extract all files from a fstdd
80
+
81
+ ```
82
+ fstdtools extract dict.fstdd
83
+ ```
84
+
85
+ Extract a single file from a fstdd by key path
86
+
87
+ ```
88
+ fstdtools extract -k path/to/1.png dict.fstdd
89
+ ```
90
+
91
+ ### Search
92
+
93
+ Show the meta information of fstdx/fstdd
94
+
95
+ ```
96
+ fstdtools search -m dict.fstdx
97
+ fstdtools search -m dict.fstdd
98
+ ```
99
+
100
+ All key list of fstdx/fstdd
101
+
102
+ ```
103
+ fstdtools search -u dict.fstdx
104
+ fstdtools search -u dict.fstdd
105
+ ```
106
+
107
+ Query the value of a key from fstdx
108
+
109
+ ```
110
+ fstdtools search -k dictionary dict.fstdx
111
+ ```
112
+
113
+ Search a regex pattern
114
+
115
+ ```
116
+ fstdtools search -r 'd.*i.*on.*y$' dict.fstdx
117
+ ```
118
+
119
+ Spell-check a word
120
+
121
+ ```
122
+ fstdtools search -s 'condiction' dict.fstdx
123
+ ```
124
+
125
+ Fuzzy search based on edit distance
126
+
127
+ ```
128
+ fstdtools search -g 'testingsx' test.fstdx
129
+ ```
130
+
131
+ Prefix distance with prior suffix search from multiple fstdx files
132
+
133
+ ```
134
+ fstdtools search -P 2 -k 振り返ってるのは -x "する" -x "う" -x "く" -x "ぐ" -x "す" -x "つ" -x "ぬ" -x "ぶ" -x "む" -x "る" -x "い" -x "하다" -x "다" -f dict.fstdx 
135
+ ```
136
+
137
+ ## Reference
138
+
139
+ * https://github.com/MouJieQin/fstd
140
+ * https://github.com/liuyug/mdict-utils
141
+ * https://bitbucket.org/xwang/mdict-analysis
142
+ * https://github.com/zhansliu/writemdict
@@ -0,0 +1,129 @@
1
+ # fstdtools
2
+ A command line tool to compile/search/list/info [fstd](https://github.com/MouJieQin/fstd) dictionary and convert mdx/mdd to fstdx/fstdd.
3
+
4
+ ## Install
5
+
6
+ ```
7
+ pip install fstdtools
8
+ ```
9
+
10
+ ## Usage
11
+
12
+ ### Compile
13
+
14
+ Convert mdx/mdd to fstdx/fstdd
15
+
16
+ ```
17
+ fstdtools write dict.mdx
18
+ fstdtools write dict.mdd
19
+ ```
20
+
21
+ Compile a raw txt file to fstdx
22
+
23
+ ```
24
+ fstdtools write dict.txt
25
+ ```
26
+
27
+ > 1. The first entry requires no preceding delimiter: write the entry word (key) directly, followed by its corresponding definition (value). Definitions can span multiple lines, but entry words must stay on a single line.
28
+ > 2. Starting from the second entry, each entry word (key) must be preceded by the delimiter `</>`. Entry words must still be written on one line, while definitions can span multiple lines.
29
+ > 3. The end of each complete entry (entry word + corresponding definition) must be marked with the delimiter `</>` as a closing tag.
30
+
31
+
32
+ ````
33
+ Ab
34
+ The definition of Ab
35
+ </>
36
+ Ababdeh
37
+ The definition of Ababdeh
38
+ </>
39
+ Abby
40
+ The definition of Abby
41
+ </>
42
+ ...
43
+ ````
44
+
45
+ Compile a directory to fstdd
46
+
47
+ ```
48
+ # data is a directory
49
+ fstdtools write data dict.mdd
50
+ ```
51
+
52
+ It's supported to compile a fstdx from a fstdx file, but not supported for fstdd now.
53
+
54
+ ```
55
+ fstdtools -v write -b 8 -c 15 -d 130 -T "dict titile" -D "dict description" dict.fstdx dict.15-8-130.fstdx
56
+ ```
57
+
58
+ ### Extract
59
+
60
+ Extract raw text from a fstdx
61
+
62
+ ```
63
+ fstdtools extract dict.fstdx
64
+ ```
65
+
66
+ Extract all files from a fstdd
67
+
68
+ ```
69
+ fstdtools extract dict.fstdd
70
+ ```
71
+
72
+ Extract a single file from a fstdd by key path
73
+
74
+ ```
75
+ fstdtools extract -k path/to/1.png dict.fstdd
76
+ ```
77
+
78
+ ### Search
79
+
80
+ Show the meta information of fstdx/fstdd
81
+
82
+ ```
83
+ fstdtools search -m dict.fstdx
84
+ fstdtools search -m dict.fstdd
85
+ ```
86
+
87
+ All key list of fstdx/fstdd
88
+
89
+ ```
90
+ fstdtools search -u dict.fstdx
91
+ fstdtools search -u dict.fstdd
92
+ ```
93
+
94
+ Query the value of a key from fstdx
95
+
96
+ ```
97
+ fstdtools search -k dictionary dict.fstdx
98
+ ```
99
+
100
+ Search a regex pattern
101
+
102
+ ```
103
+ fstdtools search -r 'd.*i.*on.*y$' dict.fstdx
104
+ ```
105
+
106
+ Spell-check a word
107
+
108
+ ```
109
+ fstdtools search -s 'condiction' dict.fstdx
110
+ ```
111
+
112
+ Fuzzy search based on edit distance
113
+
114
+ ```
115
+ fstdtools search -g 'testingsx' test.fstdx
116
+ ```
117
+
118
+ Prefix distance with prior suffix search from multiple fstdx files
119
+
120
+ ```
121
+ fstdtools search -P 2 -k 振り返ってるのは -x "する" -x "う" -x "く" -x "ぐ" -x "す" -x "つ" -x "ぬ" -x "ぶ" -x "む" -x "る" -x "い" -x "하다" -x "다" -f dict.fstdx 
122
+ ```
123
+
124
+ ## Reference
125
+
126
+ * https://github.com/MouJieQin/fstd
127
+ * https://github.com/liuyug/mdict-utils
128
+ * https://bitbucket.org/xwang/mdict-analysis
129
+ * https://github.com/zhansliu/writemdict
@@ -0,0 +1 @@
1
+ 0.0.3
@@ -0,0 +1,389 @@
1
+ import click
2
+ import fstd
3
+ import json
4
+ from pathlib import Path
5
+ from .convert import convert
6
+ from importlib.metadata import version, PackageNotFoundError
7
+
8
+
9
+ def get_version():
10
+ try:
11
+ return version("fstdtools")
12
+ except PackageNotFoundError:
13
+ return "0.0.0-unknown"
14
+
15
+
16
+ def print_version(ctx, param, value):
17
+ if not value or ctx.resilient_parsing:
18
+ return
19
+ ver = fstd.get_version()
20
+ click.echo(click.style(f"fstdtools v{get_version()} | fstd core v{ver}", fg="green"))
21
+ ctx.exit()
22
+
23
+
24
+ def overwrite_confirm(ctx, file_path, yes):
25
+ if not yes and Path(file_path).exists():
26
+ if not click.confirm(click.style(f"File {file_path} already exists. Overwrite?", fg="yellow"), default=False):
27
+ click.echo("Operation cancelled", err=True)
28
+ ctx.exit(code=1)
29
+ return False
30
+
31
+
32
+ def print_search_result(ctx, res):
33
+ for item in res:
34
+ print(item)
35
+ ctx.exit(code=0)
36
+
37
+
38
+ # ===================== Global options =====================
39
+ @click.group(name="fstdtools", help="CLI tools for fstd dictionary to pack/unpack/list/info/convert.", context_settings={"help_option_names": ["-h", "--help"]})
40
+ @click.option("-V", "--version", is_flag=True, callback=print_version, expose_value=False, is_eager=True, help="print version info and exit")
41
+ @click.option("--verbose", "-v", count=True, help="log level, -v simple log, -vv debug log")
42
+ @click.option("--log-level", "-l", type=click.IntRange(min=0, max=6), default=4, show_default=True, envvar="FSTDTOOLS_LOG_LEVEL", help=" log_level: 0-6, 0 is trace, 1 is debug, 2 is info, 3 is warn, 4 is error, 5 is critical, 6 is off")
43
+ @click.pass_context
44
+ def cli(ctx, verbose, log_level):
45
+ """global init, all subcommands share this context"""
46
+ ctx.ensure_object(dict)
47
+ ctx.obj["verbose"] = verbose
48
+ ctx.obj["log_level"] = log_level
49
+ fstd.set_log_level(log_level)
50
+
51
+
52
+ # ===================== Subcommands extract =====================
53
+ @cli.command(name="extract", help="extract raw data from fstdx/fstdd")
54
+ @click.argument("fstd_file", type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True))
55
+ @click.argument("output_path", type=click.Path(file_okay=True, dir_okay=True, writable=True), required=False)
56
+ @click.option("-k", "--key-path", type=str, required=False, help="key path only for fstdd, e.g. 'folder1/folder2/file.png'")
57
+ @click.option("-y", "--yes", is_flag=True, help="overwrite output file, no confirm")
58
+ @click.pass_context
59
+ def extract(ctx, fstd_file, output_path, key_path, yes):
60
+ src = Path(fstd_file)
61
+ out = Path(output_path) if output_path else None
62
+
63
+ if out and out.exists() and not yes:
64
+ if not click.confirm(click.style(f"File {out} already exists, overwrite?", fg="yellow")):
65
+ click.echo("Operation cancelled", err=True)
66
+ ctx.exit(code=1)
67
+
68
+ if src.suffix == ".fstdx":
69
+ if not output_path:
70
+ output_path = str(src.with_suffix(".txt"))
71
+ overwrite_confirm(ctx, output_path, yes)
72
+ reader = fstd.FstdxReader(str(src.resolve()))
73
+ if not reader.is_valid():
74
+ click.echo(click.style(f"Invalid fstdx file {src}", fg="red"), err=True)
75
+ ctx.exit(code=1)
76
+ if reader.extract(output_path):
77
+ click.echo(click.style(f"{str(Path(output_path).resolve())} extracted successfully", fg="bright_green"))
78
+ else:
79
+ ctx.exit(code=1)
80
+ elif src.suffix == ".fstdd":
81
+ if not output_path:
82
+ output_path = str(src.parent / "data")
83
+ overwrite_confirm(ctx, output_path, yes)
84
+ reader = fstd.FstddReader(str(src.resolve()))
85
+ if not reader.is_valid():
86
+ click.echo(click.style(f"Invalid fstdd file {src}", fg="red"), err=True)
87
+ ctx.exit(code=1)
88
+ if key_path:
89
+ if reader.extract(key_path, output_path):
90
+ click.echo(click.style(f"{str(Path(output_path)/key_path)} extracted successfully", fg="bright_green"))
91
+ else:
92
+ ctx.exit(code=1)
93
+ else:
94
+ if reader.extract_all(output_path):
95
+ click.echo(click.style(f"{str(Path(output_path).resolve())} extracted successfully", fg="bright_green"))
96
+ else:
97
+ ctx.exit(code=1)
98
+ else:
99
+ click.echo(click.style(f"Invalid file type {src.suffix}", fg="red"), err=True)
100
+ ctx.exit(code=1)
101
+ ctx.exit(code=0)
102
+
103
+
104
+ # ===================== Subcommands write =====================
105
+ @cli.command(name="write", help="Compile from txt/fstdx/mdx to fstdx, from directory/mdd to fstdd")
106
+ @click.argument("source_file", type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True))
107
+ @click.argument("output_file", type=click.Path(file_okay=True, dir_okay=False, writable=True), required=False)
108
+ @click.option("-T", "--title", type=str, required=False, help="title text/file of the dictionary")
109
+ @click.option("-D", "--description", type=str, required=False, help="description text/file of the dictionary")
110
+ @click.option("-e", "--encoding", type=str, required=False, default="utf-8", help="encoding of the dictionary.", show_default=True)
111
+ @click.option("-m", "--meta", type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True), required=False, help="meta file(json) of the dictionary")
112
+ @click.option("-c", "--compress-level", type=click.IntRange(min=0, max=22), default=5, help="compression level 0(fast) ~ 22(max compress).", show_default=True)
113
+ @click.option("-d", "--compress-dict-size", type=click.IntRange(min=1, max=130), default=100, help="compression dict size, only for fstdx, 1~130.", show_default=True)
114
+ @click.option("-b", "--block-size", type=click.IntRange(min=4, max=512), default=4, help="block size, unit: KB.", show_default=True)
115
+ @click.option("-t", "--thread", type=int, default=0, help="concurrency thread count, auto detect cpu count if 0.", show_default=True)
116
+ @click.option("--substyle/--no-substyle", default=False, help="enable substyle, only for mdx/mdd to fstdx/fstdd.", show_default=True)
117
+ @click.option("-y", "--yes", is_flag=True, help="overwrite output file, no confirm")
118
+ @click.pass_context
119
+ def write(
120
+ ctx, source_file, output_file, title, description, encoding, meta, compress_level, compress_dict_size, block_size, thread, substyle, yes
121
+ ):
122
+ verbose = ctx.obj["verbose"]
123
+ src = Path(source_file)
124
+ out = Path(output_file) if output_file else None
125
+
126
+ if not src.exists():
127
+ click.echo(click.style(f"Source file {src} does not exist", fg="red"), err=True)
128
+ ctx.exit(code=1)
129
+
130
+ if out and out.exists() and not yes:
131
+ if not click.confirm(click.style(f"File {out} already exists, overwrite?", fg="yellow")):
132
+ click.echo("Operation cancelled", err=True)
133
+ ctx.exit(code=1)
134
+
135
+ if not meta:
136
+ meta = {}
137
+ else:
138
+ if not Path(meta).is_file():
139
+ click.echo(click.style(f"Meta file {meta} does not exist", fg="red"), err=True)
140
+ ctx.exit(code=1)
141
+ else:
142
+ try:
143
+ meta = json.load(open(meta, 'rt', encoding='utf-8'))
144
+ except json.JSONDecodeError:
145
+ click.echo(click.style(f"Meta file {meta} is not valid json", fg="red"), err=True)
146
+ ctx.exit(code=1)
147
+ except Exception as e:
148
+ click.echo(click.style(f"Meta file {meta} error: {e}", fg="red"), err=True)
149
+ ctx.exit(code=1)
150
+
151
+ if title:
152
+ if Path(title).is_file():
153
+ title = open(title, 'rt', encoding='utf-8').read().strip()
154
+ meta["Title"] = title
155
+
156
+ if description:
157
+ if Path(description).is_file():
158
+ description = open(description, 'rt', encoding='utf-8').read().strip()
159
+ meta["Description"] = description
160
+
161
+ if encoding:
162
+ encoding = encoding.upper()
163
+ meta["Encoding"] = encoding
164
+
165
+ def show_verbose():
166
+ if verbose >= 1:
167
+ click.echo(click.style(f"Source file: {src}", fg="cyan"))
168
+ click.echo(click.style(f"Output file: {output_file if output_file else 'default'}", fg="cyan"))
169
+ click.echo(click.style(f"Compression level: {compress_level}", fg="cyan"))
170
+ click.echo(click.style(f"Compression dict size: {compress_dict_size}", fg="cyan"))
171
+ click.echo(click.style(f"Block size: {block_size}", fg="cyan"))
172
+ click.echo(click.style(f"Concurrency threads: {thread if thread > 0 else 'auto detect cpu count'}", fg="cyan"))
173
+ click.echo(click.style(f"Meta: {json.dumps(meta, ensure_ascii=False,indent=2) if meta else 'use default'}", fg="cyan"))
174
+
175
+ if src.is_dir():
176
+ if out and not out.suffix == ".fstdd":
177
+ click.echo(click.style("For mdd source, output file must have .fstdd extension", fg="red"), err=True)
178
+ ctx.exit(code=1)
179
+ if not output_file:
180
+ output_file = str(src.with_suffix(".fstdd"))
181
+ overwrite_confirm(ctx, output_file, yes)
182
+ writer = fstd.FstddWriter()
183
+ show_verbose()
184
+ writer.compile_fstdd(source_file, output_file, json.dumps(meta), block_size, compress_level, thread, verbose >= 1)
185
+
186
+ elif src.suffix == ".mdx":
187
+ if out and not out.suffix == ".fstdx":
188
+ click.echo(click.style("For mdx source, output file must have .fstdx extension", fg="red"), err=True)
189
+ ctx.exit(code=1)
190
+ if not output_file:
191
+ output_file = str(src.with_suffix(".fstdx"))
192
+ overwrite_confirm(ctx, output_file, yes)
193
+ show_verbose()
194
+ convert(source_file, output_file, compress_level, compress_dict_size, block_size, thread, substyle, None)
195
+
196
+ elif src.suffix == ".mdd":
197
+ if out and not out.suffix == ".fstdd":
198
+ click.echo(click.style("For mdd source, output file must have .fstdd extension", fg="red"), err=True)
199
+ ctx.exit(code=1)
200
+ if not output_file:
201
+ output_file = str(src.with_suffix(".fstdd"))
202
+ overwrite_confirm(ctx, output_file, yes)
203
+ show_verbose()
204
+ convert(source_file, output_file, compress_level, compress_dict_size, block_size, thread, substyle, None)
205
+
206
+ else:
207
+ # see src as txt or fstdx, then convert to fstdx
208
+ if out and out.suffix != ".fstdx":
209
+ click.echo(click.style("For txt or fstdx source, output file must have .fstdx extension", fg="red"), err=True)
210
+ ctx.exit(code=1)
211
+ if not output_file:
212
+ if src.suffix == ".fstdx":
213
+ output_file = str(src.resolve()) + ".fstdx"
214
+ else:
215
+ output_file = str(src.with_suffix(".fstdx"))
216
+ overwrite_confirm(ctx, output_file, yes)
217
+ if str(src.resolve()) == str(Path(output_file).resolve()):
218
+ click.echo(click.style("Output file is same as source file, no conversion", fg="red"), err=True)
219
+ ctx.exit(code=1)
220
+ writer = fstd.FstdxWriter()
221
+ show_verbose()
222
+ writer.compile_fstdx(source_file, output_file, json.dumps(meta), block_size, compress_level, compress_dict_size, thread, False, verbose >= 1)
223
+
224
+ click.echo(click.style(f"{output_file} written successfully", fg="bright_green"))
225
+ ctx.exit(code=0)
226
+
227
+
228
+ # ===================== Subcommands search =====================
229
+ @cli.command(name="search", help="Search in fstdx/fstdd dictionary")
230
+ @click.argument("fstd_file", required=False, type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True))
231
+ @click.option("-m", "--meta", is_flag=True, required=False, help="show meta information")
232
+ @click.option("-H", "--header", is_flag=True, required=False, help="show header information")
233
+ @click.option("-u", "--enumerate", is_flag=True, required=False, help="enumerate all keys in dictionary")
234
+ @click.option("-c", "--contains", type=str, required=False, help="check if key exists in dictionary")
235
+ @click.option("-k", "--key", type=str, required=False, help="show the value of key. (exact match)")
236
+ @click.option("-p", "--predictive", type=str, required=False, help="perform predictive search")
237
+ @click.option("-r", "--regex", type=str, required=False, help="Run regex pattern search")
238
+ @click.option("-s", "--spellcheck", type=str, required=False, help="Spell-check a word")
239
+ @click.option("-g", "--suggest", type=str, required=False, help="Get word suggestions for")
240
+ @click.option("-C", "--common-prefix", type=str, required=False, help="Search common prefix matches")
241
+ @click.option("-l", "--longest-prefix", type=str, required=False, help="Find longest common prefix")
242
+ @click.option("-e", "--edit-distance", type=int, required=False, help="Max edit distance for fuzzy search")
243
+ @click.option("-P", "--prefix-distance", type=int, required=False, help="Max distance for prefix distance search")
244
+ @click.option("-x", "--prior-suffix", type=str, multiple=True, required=False, help="Prior suffix only for prefix distance search")
245
+ @click.option("-f", "--dictionary", multiple=True, type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True), required=False, help="Add multiple .fstdx files for batch search")
246
+ @click.option("-t", "--thread", type=int, default=0, help="concurrency thread count, auto detect cpu count if 0.", show_default=True)
247
+ @click.pass_context
248
+ def search(ctx, fstd_file, meta, header, contains, key, predictive,
249
+ enumerate, regex, spellcheck, suggest, common_prefix, longest_prefix, edit_distance, prefix_distance, prior_suffix, dictionary, thread):
250
+ """
251
+ Search in fstdx/fstdd dictionary.
252
+ """
253
+ if dictionary:
254
+ searcher = fstd.FstdxSearcher(thread)
255
+ if not searcher.is_valid():
256
+ click.echo(click.style("Invalid thread count. Please use -t to specify a valid thread count.", fg="red"), err=True)
257
+ ctx.exit(code=1)
258
+ for dict_file in dictionary:
259
+ if not searcher.insert(dict_file, dict_file):
260
+ click.echo(click.style(f"Insert fstdx file {dict_file} failed.", fg="red"), err=True)
261
+ ctx.exit(code=1)
262
+ if contains:
263
+ click.echo(click.style(f"{searcher.contains(contains, dictionary)}", fg="cyan"))
264
+ ctx.exit(code=0)
265
+ if predictive:
266
+ print_search_result(ctx, searcher.predictive_search(predictive, dictionary))
267
+ if regex:
268
+ res = searcher.regex_search(regex, dictionary)
269
+ if res[1]:
270
+ click.echo(click.style(f"Regex error: {res[1]}", fg="red"), err=True)
271
+ ctx.exit(code=1)
272
+ print_search_result(ctx, res[0])
273
+ if suggest:
274
+ print_search_result(ctx, searcher.suggest(suggest, dictionary))
275
+ if common_prefix:
276
+ print_search_result(ctx, searcher.common_prefix_search(common_prefix, dictionary))
277
+ if longest_prefix:
278
+ print(longest_prefix[0:searcher.longest_prefix_len(longest_prefix, dictionary)])
279
+ ctx.exit(code=0)
280
+ if edit_distance:
281
+ if not key:
282
+ click.echo(click.style("Please use -k to specify a key.", fg="red"), err=True)
283
+ ctx.exit(code=1)
284
+ print_search_result(ctx, searcher.edit_distance_search(key, dictionary, edit_distance))
285
+ if prefix_distance:
286
+ if not key:
287
+ click.echo(click.style("Please use -k to specify a key.", fg="red"), err=True)
288
+ ctx.exit(code=1)
289
+ if prior_suffix:
290
+ searcher.insert_prior_suffix(prior_suffix)
291
+ print_search_result(ctx, searcher.prefix_distance_search(key, dictionary, prefix_distance))
292
+ if key:
293
+ res = searcher.exact_match_search(key, dictionary)
294
+ if not res:
295
+ click.echo(click.style(f"Key {key} not found in dictionaries.", fg="red"), err=True)
296
+ ctx.exit(code=1)
297
+ else:
298
+ for name, values in res.items():
299
+ click.echo(click.style(f"# {name}:", fg="cyan"))
300
+ click.echo(click.style("---", fg="cyan"))
301
+ for value in values:
302
+ print(value)
303
+ click.echo(click.style("---", fg="cyan"))
304
+ ctx.exit(code=0)
305
+ click.echo(click.style("Invalid option to search in multiple fstdx files.", fg="red"), err=True)
306
+ ctx.exit(code=1)
307
+ if not fstd_file:
308
+ click.echo(click.style("Please specify a fstdx/fstdd file.", fg="red"), err=True)
309
+ ctx.exit(code=1)
310
+ src = Path(fstd_file)
311
+ if (src.suffix == ".fstdd"):
312
+ reader = fstd.FstddReader(fstd_file)
313
+ if not reader.is_valid():
314
+ click.echo(click.style(f"Invalid fstdd file {fstd_file}", fg="red"), err=True)
315
+ ctx.exit(code=1)
316
+ if meta:
317
+ click.echo(click.style(f"{json.dumps(json.loads(reader.get_meta()), ensure_ascii=False,indent=2)}", fg="cyan"))
318
+ ctx.exit(code=0)
319
+ if header:
320
+ click.echo(click.style(f"{json.dumps(json.loads(reader.get_header()), ensure_ascii=False,indent=2)}", fg="cyan"))
321
+ ctx.exit(code=0)
322
+ if contains:
323
+ click.echo(click.style(f"{reader.contains(contains)}", fg="cyan"))
324
+ ctx.exit(code=0)
325
+ if enumerate:
326
+ all_keys = reader.extract_all_key()
327
+ for key in all_keys:
328
+ print(key)
329
+ ctx.exit(code=0)
330
+ click.echo(click.style("Invalid option to search in fstdd file.", fg="red"), err=True)
331
+ ctx.exit(code=1)
332
+ elif (src.suffix == ".fstdx"):
333
+ reader = fstd.FstdxReader(fstd_file)
334
+ if not reader.is_valid():
335
+ click.echo(click.style(f"Invalid fstdx file {fstd_file}", fg="red"), err=True)
336
+ ctx.exit(code=1)
337
+ if meta:
338
+ click.echo(click.style(f"{json.dumps(json.loads(reader.get_meta()), ensure_ascii=False,indent=2)}", fg="cyan"))
339
+ ctx.exit(code=0)
340
+ if header:
341
+ click.echo(click.style(f"{json.dumps(json.loads(reader.get_header()), ensure_ascii=False,indent=2)}", fg="cyan"))
342
+ ctx.exit(code=0)
343
+ if contains:
344
+ click.echo(click.style(f"{reader.contains(contains)}", fg="cyan"))
345
+ ctx.exit(code=0)
346
+ if enumerate:
347
+ reader.enumerate_print()
348
+ ctx.exit(code=0)
349
+ if predictive:
350
+ print_search_result(ctx, reader.predictive_search(predictive))
351
+ if regex:
352
+ res = reader.regex_search(regex, thread)
353
+ if res[1]:
354
+ click.echo(click.style(f"Regex error: {res[1]}", fg="red"), err=True)
355
+ ctx.exit(code=1)
356
+ print_search_result(ctx, res[0])
357
+ if spellcheck:
358
+ print_search_result(ctx, reader.spellcheck_word(spellcheck, dictionary))
359
+ if suggest:
360
+ print_search_result(ctx, reader.suggest(suggest))
361
+ if common_prefix:
362
+ print_search_result(ctx, reader.common_prefix_search(common_prefix))
363
+ if longest_prefix:
364
+ print(longest_prefix[0:reader.longest_prefix_len(longest_prefix)])
365
+ ctx.exit(code=0)
366
+ if edit_distance:
367
+ if not key:
368
+ click.echo(click.style("Please use -k to specify a key.", fg="red"), err=True)
369
+ ctx.exit(code=1)
370
+ print_search_result(ctx, reader.edit_distance_search(key, edit_distance))
371
+ if key:
372
+ res = reader.exact_match_search(key)
373
+ if res:
374
+ print_search_result(ctx, res)
375
+ else:
376
+ click.echo(click.style(f"Key {key} not found in dictionary.", fg="red"), err=True)
377
+ ctx.exit(code=1)
378
+ if prefix_distance:
379
+ click.echo(click.style("Prefix distance search not implemented to search in single fstdx. Use -f to search instead.", fg="red"), err=True)
380
+ ctx.exit(code=1)
381
+ click.echo(click.style("Invalid option to search in fstdx file.", fg="red"), err=True)
382
+ ctx.exit(code=1)
383
+ else:
384
+ click.echo(click.style(f"Invalid file type {src.suffix}", fg="red"), err=True)
385
+ ctx.exit(code=1)
386
+
387
+
388
+ if __name__ == "__main__":
389
+ cli()
@@ -0,0 +1,142 @@
1
+ Metadata-Version: 2.4
2
+ Name: fstdtools
3
+ Version: 0.0.3
4
+ Summary: CLI tools to compile/search/list/info fstd dictionary and convert mdx/mdd to fstdx/fstdd
5
+ Author-email: Moujie Qin <moujieqin@gmail.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: fstd>=0.1.6
10
+ Requires-Dist: click>=8.0
11
+ Requires-Dist: tqdm>=4.64.0
12
+ Dynamic: license-file
13
+
14
+ # fstdtools
15
+ A command line tool to compile/search/list/info [fstd](https://github.com/MouJieQin/fstd) dictionary and convert mdx/mdd to fstdx/fstdd.
16
+
17
+ ## Install
18
+
19
+ ```
20
+ pip install fstdtools
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ### Compile
26
+
27
+ Convert mdx/mdd to fstdx/fstdd
28
+
29
+ ```
30
+ fstdtools write dict.mdx
31
+ fstdtools write dict.mdd
32
+ ```
33
+
34
+ Compile a raw txt file to fstdx
35
+
36
+ ```
37
+ fstdtools write dict.txt
38
+ ```
39
+
40
+ > 1. The first entry requires no preceding delimiter: write the entry word (key) directly, followed by its corresponding definition (value). Definitions can span multiple lines, but entry words must stay on a single line.
41
+ > 2. Starting from the second entry, each entry word (key) must be preceded by the delimiter `</>`. Entry words must still be written on one line, while definitions can span multiple lines.
42
+ > 3. The end of each complete entry (entry word + corresponding definition) must be marked with the delimiter `</>` as a closing tag.
43
+
44
+
45
+ ````
46
+ Ab
47
+ The definition of Ab
48
+ </>
49
+ Ababdeh
50
+ The definition of Ababdeh
51
+ </>
52
+ Abby
53
+ The definition of Abby
54
+ </>
55
+ ...
56
+ ````
57
+
58
+ Compile a directory to fstdd
59
+
60
+ ```
61
+ # data is a directory
62
+ fstdtools write data dict.mdd
63
+ ```
64
+
65
+ It's supported to compile a fstdx from a fstdx file, but not supported for fstdd now.
66
+
67
+ ```
68
+ fstdtools -v write -b 8 -c 15 -d 130 -T "dict titile" -D "dict description" dict.fstdx dict.15-8-130.fstdx
69
+ ```
70
+
71
+ ### Extract
72
+
73
+ Extract raw text from a fstdx
74
+
75
+ ```
76
+ fstdtools extract dict.fstdx
77
+ ```
78
+
79
+ Extract all files from a fstdd
80
+
81
+ ```
82
+ fstdtools extract dict.fstdd
83
+ ```
84
+
85
+ Extract a single file from a fstdd by key path
86
+
87
+ ```
88
+ fstdtools extract -k path/to/1.png dict.fstdd
89
+ ```
90
+
91
+ ### Search
92
+
93
+ Show the meta information of fstdx/fstdd
94
+
95
+ ```
96
+ fstdtools search -m dict.fstdx
97
+ fstdtools search -m dict.fstdd
98
+ ```
99
+
100
+ All key list of fstdx/fstdd
101
+
102
+ ```
103
+ fstdtools search -u dict.fstdx
104
+ fstdtools search -u dict.fstdd
105
+ ```
106
+
107
+ Query the value of a key from fstdx
108
+
109
+ ```
110
+ fstdtools search -k dictionary dict.fstdx
111
+ ```
112
+
113
+ Search a regex pattern
114
+
115
+ ```
116
+ fstdtools search -r 'd.*i.*on.*y$' dict.fstdx
117
+ ```
118
+
119
+ Spell-check a word
120
+
121
+ ```
122
+ fstdtools search -s 'condiction' dict.fstdx
123
+ ```
124
+
125
+ Fuzzy search based on edit distance
126
+
127
+ ```
128
+ fstdtools search -g 'testingsx' test.fstdx
129
+ ```
130
+
131
+ Prefix distance with prior suffix search from multiple fstdx files
132
+
133
+ ```
134
+ fstdtools search -P 2 -k 振り返ってるのは -x "する" -x "う" -x "く" -x "ぐ" -x "す" -x "つ" -x "ぬ" -x "ぶ" -x "む" -x "る" -x "い" -x "하다" -x "다" -f dict.fstdx 
135
+ ```
136
+
137
+ ## Reference
138
+
139
+ * https://github.com/MouJieQin/fstd
140
+ * https://github.com/liuyug/mdict-utils
141
+ * https://bitbucket.org/xwang/mdict-analysis
142
+ * https://github.com/zhansliu/writemdict
@@ -1,3 +1,3 @@
1
- fstd>=0.1.3
1
+ fstd>=0.1.6
2
2
  click>=8.0
3
3
  tqdm>=4.64.0
@@ -6,11 +6,11 @@ build-backend = "setuptools.build_meta"
6
6
  name = "fstdtools"
7
7
  dynamic = ["version"]
8
8
  authors = [{ name = "Moujie Qin", email = "moujieqin@gmail.com" }]
9
- description = "CLI tools for fstd dictionary to pack/unpack/list/info/convert."
9
+ description = "CLI tools to compile/search/list/info fstd dictionary and convert mdx/mdd to fstdx/fstdd"
10
10
  requires-python = ">=3.9"
11
11
  license-files = ["LICENSE"]
12
12
  dependencies = [
13
- "fstd>=0.1.3",
13
+ "fstd>=0.1.6",
14
14
  "click>=8.0",
15
15
  "tqdm>=4.64.0"
16
16
  ]
fstdtools-0.0.1/PKG-INFO DELETED
@@ -1,15 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: fstdtools
3
- Version: 0.0.1
4
- Summary: CLI tools for fstd dictionary to pack/unpack/list/info/convert.
5
- Author-email: Moujie Qin <moujieqin@gmail.com>
6
- Requires-Python: >=3.9
7
- Description-Content-Type: text/markdown
8
- License-File: LICENSE
9
- Requires-Dist: fstd>=0.1.3
10
- Requires-Dist: click>=8.0
11
- Requires-Dist: tqdm>=4.64.0
12
- Dynamic: license-file
13
-
14
- # fstdtools
15
- A command line tool for fstd dictionary to pack/unpack/list/info/convert.
fstdtools-0.0.1/README.md DELETED
@@ -1,2 +0,0 @@
1
- # fstdtools
2
- A command line tool for fstd dictionary to pack/unpack/list/info/convert.
fstdtools-0.0.1/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.1
@@ -1,130 +0,0 @@
1
- import click
2
- import fstd
3
- from pathlib import Path
4
- from .convert import convert as converter
5
- from importlib.metadata import version, PackageNotFoundError
6
-
7
-
8
- def get_version():
9
- try:
10
- return version("fstdtools")
11
- except PackageNotFoundError:
12
- return "0.0.0-unknown"
13
-
14
-
15
- def print_version(ctx, param, value):
16
- if not value or ctx.resilient_parsing:
17
- return
18
- ver = fstd.get_version()
19
- click.echo(click.style(f"fstdtools v{get_version()} | fstd core v{ver}", fg="green"))
20
- ctx.exit()
21
-
22
-
23
- def overwrite_confirm(ctx, file_path):
24
- if Path(file_path).exists():
25
- if not click.confirm(click.style(f"File {file_path} already exists. Overwrite?", fg="yellow"), default=False):
26
- click.echo("Operation cancelled", err=True)
27
- ctx.exit(code=1)
28
- return False
29
-
30
-
31
- # ===================== Global options =====================
32
-
33
- @click.group(name="fstdtools", help="CLI tools for fstd dictionary to pack/unpack/list/info/convert.", context_settings={"help_option_names": ["-h", "--help"]})
34
- @click.option("-V", "--version", is_flag=True, callback=print_version, expose_value=False, is_eager=True, help="print version info and exit")
35
- @click.option("--verbose", "-v", count=True, help="log level, -v simple log, -vv debug log")
36
- @click.option("--log-level", "-l", type=click.IntRange(min=0, max=6), default=4, show_default=True, envvar="FSTDTOOLS_LOG_LEVEL", help=" log_level: 0-6, 0 is trace, 1 is debug, 2 is info, 3 is warn, 4 is error, 5 is critical, 6 is off")
37
- @click.pass_context
38
- def cli(ctx, verbose, log_level):
39
- """global init, all subcommands share this context"""
40
- ctx.ensure_object(dict)
41
- ctx.obj["verbose"] = verbose
42
- ctx.obj["log_level"] = log_level
43
- fstd.set_log_level(log_level)
44
-
45
-
46
- # ===================== Subcommands write =====================
47
-
48
- @cli.command(name="write", help="from txt/fstdx/mdx to fstdx, from directory/mdd to fstdd")
49
- @click.argument("source_file", type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True))
50
- @click.argument("output_file", type=click.Path(file_okay=True, dir_okay=False, writable=True), required=False)
51
- @click.option("-c", "--compress-level", type=click.IntRange(min=0, max=22), default=5, help="compression level 0(fast) ~ 22(max compress)")
52
- @click.option("--compress-dict-size", type=click.IntRange(min=1, max=130), default=100, help="compression dict size, only for fstdx, 1~130, default 100")
53
- @click.option("-b", "--block-size", type=click.IntRange(min=4, max=512), default=4, help="block size, default 4, unit KB")
54
- @click.option("-t", "--thread", type=int, default=0, help="concurrency thread count, default 0, auto detect cpu count")
55
- @click.option("--substyle/--no-substyle", default=False, help="enable substyle, only for mdx/mdd to fstdx/fstdd, default False")
56
- @click.option("-y", "--yes", is_flag=True, help="overwrite output file, no confirm")
57
- @click.pass_context
58
- def convert_(
59
- ctx, source_file, output_file, compress_level, compress_dict_size, block_size, thread, substyle, yes
60
- ):
61
- verbose = ctx.obj["verbose"]
62
- src = Path(source_file)
63
- out = Path(output_file) if output_file else None
64
-
65
- if not src.exists():
66
- click.echo(click.style(f"Source file {src} does not exist", fg="red"), err=True)
67
- ctx.exit(code=1)
68
-
69
- if out and out.exists() and not yes:
70
- if not click.confirm(click.style(f"File {out} already exists, overwrite?", fg="yellow")):
71
- click.echo("Operation cancelled", err=True)
72
- ctx.exit(code=1)
73
-
74
- def show_verbose():
75
- if verbose >= 1:
76
- click.echo(click.style(f"Source file: {src}", fg="cyan"))
77
- click.echo(click.style(f"Output file: {output_file if output_file else 'default'}", fg="cyan"))
78
- click.echo(click.style(f"Compression level: {compress_level}", fg="cyan"))
79
- click.echo(click.style(f"Compression dict size: {compress_dict_size}", fg="cyan"))
80
- click.echo(click.style(f"Block size: {block_size}", fg="cyan"))
81
- click.echo(click.style(f"Concurrency threads: {thread if thread > 0 else 'auto detect cpu count'}", fg="cyan"))
82
-
83
- if src.is_dir():
84
- if out and not out.suffix == ".fstdd":
85
- click.echo(click.style("For mdd source, output file must have .fstdd extension", fg="red"), err=True)
86
- ctx.exit(code=1)
87
- if not output_file:
88
- output_file = str(src.with_suffix(".fstdd"))
89
- overwrite_confirm(ctx, output_file)
90
- writer = fstd.FstddWriter()
91
- show_verbose()
92
- writer.compile_fstdd(source_file, output_file, "{}", block_size, compress_level, thread, verbose >= 1)
93
-
94
- elif src.suffix == ".mdx":
95
- if out and not out.suffix == ".fstdx":
96
- click.echo(click.style("For mdx source, output file must have .fstdx extension", fg="red"), err=True)
97
- ctx.exit(code=1)
98
- if not output_file:
99
- output_file = src.with_suffix(".fstdx")
100
- overwrite_confirm(ctx, output_file)
101
- show_verbose()
102
- converter(source_file, output_file, compress_level, compress_dict_size, block_size, thread, substyle, None)
103
-
104
- elif src.suffix == ".mdd":
105
- if out and not out.suffix == ".fstdd":
106
- click.echo(click.style("For mdd source, output file must have .fstdd extension", fg="red"), err=True)
107
- ctx.exit(code=1)
108
- if not output_file:
109
- output_file = str(src.with_suffix(".fstdd"))
110
- overwrite_confirm(ctx, output_file)
111
- show_verbose()
112
- converter(source_file, output_file, compress_level, compress_dict_size, block_size, thread, substyle, None)
113
-
114
- else:
115
- # see src as txt or fstdx, then convert to fstdx
116
- if out and not out.suffix == ".fstdx":
117
- click.echo(click.style("For txt or fstdx source, output file must have .fstdx extension", fg="red"), err=True)
118
- ctx.exit(code=1)
119
- if not output_file:
120
- output_file = src.with_suffix(".fstdx")
121
- overwrite_confirm(ctx, output_file)
122
- writer = fstd.FstdxWriter()
123
- show_verbose()
124
- writer.compile_fstdx(source_file, output_file, "{}", block_size, compress_level, compress_dict_size, thread, False, verbose >= 1)
125
-
126
- click.echo(click.style(f"{output_file} written successfully", fg="bright_green"))
127
-
128
-
129
- if __name__ == "__main__":
130
- cli()
@@ -1,15 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: fstdtools
3
- Version: 0.0.1
4
- Summary: CLI tools for fstd dictionary to pack/unpack/list/info/convert.
5
- Author-email: Moujie Qin <moujieqin@gmail.com>
6
- Requires-Python: >=3.9
7
- Description-Content-Type: text/markdown
8
- License-File: LICENSE
9
- Requires-Dist: fstd>=0.1.3
10
- Requires-Dist: click>=8.0
11
- Requires-Dist: tqdm>=4.64.0
12
- Dynamic: license-file
13
-
14
- # fstdtools
15
- A command line tool for fstd dictionary to pack/unpack/list/info/convert.
File without changes
File without changes