looper 1.5.0__py3-none-any.whl → 1.6.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
looper/html_reports.py DELETED
@@ -1,1057 +0,0 @@
1
- """ Generate HTML reports """
2
-
3
- import glob
4
- import logging
5
- import os
6
- import re
7
- import sys
8
- from copy import copy as cp
9
- from datetime import timedelta
10
-
11
- import jinja2
12
- import pandas as _pd
13
- from eido import read_schema
14
- from peppy.const import *
15
-
16
- from ._version import __version__ as v
17
- from .const import *
18
- from .processed_project import get_project_outputs
19
- from .utils import get_file_for_project_old
20
-
21
- _LOGGER = logging.getLogger("looper")
22
-
23
-
24
- class HTMLReportBuilderOld(object):
25
- """Generate HTML summary report for project/samples"""
26
-
27
- def __init__(self, prj):
28
- """
29
- The Project defines the instance.
30
-
31
- :param Project prj: Project with which to work/operate on
32
- """
33
- super(HTMLReportBuilderOld, self).__init__()
34
- self.prj = prj
35
- self.j_env = get_jinja_env()
36
- self.reports_dir = get_file_for_project_old(self.prj, "reports")
37
- self.index_html_path = get_file_for_project_old(self.prj, "summary.html")
38
- self.index_html_filename = os.path.basename(self.index_html_path)
39
- self._outdir = self.prj.output_dir
40
- _LOGGER.debug("Reports dir: {}".format(self.reports_dir))
41
-
42
- def __call__(self, objs, stats, columns):
43
- """Do the work of the subcommand/program."""
44
- # Generate HTML report
45
- navbar = self.create_navbar(
46
- self.create_navbar_links(objs=objs, stats=stats, wd=self._outdir),
47
- self.index_html_filename,
48
- )
49
- navbar_reports = self.create_navbar(
50
- self.create_navbar_links(objs=objs, stats=stats, wd=self.reports_dir),
51
- os.path.join(os.pardir, self.index_html_filename),
52
- )
53
- index_html_path = self.create_index_html(
54
- objs,
55
- stats,
56
- columns,
57
- footer=self.create_footer(),
58
- navbar=navbar,
59
- navbar_reports=navbar_reports,
60
- )
61
- return index_html_path
62
-
63
- def create_object_parent_html(self, objs, navbar, footer):
64
- """
65
- Generates a page listing all the project objects with links
66
- to individual object pages
67
-
68
- :param pandas.DataFrame objs: project level dataframe containing any reported objects for all samples
69
- :param str navbar: HTML to be included as the navbar in the main summary page
70
- :param str footer: HTML to be included as the footer
71
- :return str: Rendered parent objects HTML file
72
- """
73
- object_parent_path = os.path.join(self.reports_dir, "objects.html")
74
-
75
- if not os.path.exists(os.path.dirname(object_parent_path)):
76
- os.makedirs(os.path.dirname(object_parent_path))
77
- pages = list()
78
- labels = list()
79
- if not objs.empty:
80
- for key in objs["key"].drop_duplicates().sort_values():
81
- page_name = key + ".html"
82
- page_path = os.path.join(
83
- self.reports_dir, page_name.replace(" ", "_").lower()
84
- )
85
- page_relpath = os.path.relpath(page_path, self.reports_dir)
86
- pages.append(page_relpath)
87
- labels.append(key)
88
-
89
- template_vars = dict(
90
- navbar=navbar, footer=footer, labels=labels, pages=pages, header="Objects"
91
- )
92
- return render_jinja_template(
93
- "navbar_list_parent.html", self.j_env, template_vars
94
- )
95
-
96
- def create_sample_parent_html(self, navbar, footer):
97
- """
98
- Generates a page listing all the project samples with links
99
- to individual sample pages
100
- :param str navbar: HTML to be included as the navbar in the main summary page
101
- :param str footer: HTML to be included as the footer
102
- :return str: Rendered parent samples HTML file
103
- """
104
- sample_parent_path = os.path.join(self.reports_dir, "samples.html")
105
-
106
- if not os.path.exists(os.path.dirname(sample_parent_path)):
107
- os.makedirs(os.path.dirname(sample_parent_path))
108
- pages = list()
109
- labels = list()
110
- for sample in self.prj.samples:
111
- sample_name = str(sample.sample_name)
112
- sample_dir = os.path.join(self.prj.results_folder, sample_name)
113
-
114
- # Confirm sample directory exists, then build page
115
- if os.path.exists(sample_dir):
116
- page_name = sample_name + ".html"
117
- page_path = os.path.join(
118
- self.reports_dir, page_name.replace(" ", "_").lower()
119
- )
120
- page_relpath = os.path.relpath(page_path, self.reports_dir)
121
- pages.append(page_relpath)
122
- labels.append(sample_name)
123
-
124
- template_vars = dict(
125
- navbar=navbar, footer=footer, labels=labels, pages=pages, header="Samples"
126
- )
127
- return render_jinja_template(
128
- "navbar_list_parent.html", self.j_env, template_vars
129
- )
130
-
131
- def create_navbar(self, navbar_links, index_html_relpath):
132
- """
133
- Creates the navbar using the privided links
134
-
135
- :param str navbar_links: HTML list of links to be inserted into a navbar
136
- :return str: navbar HTML
137
- """
138
- template_vars = dict(navbar_links=navbar_links, index_html=index_html_relpath)
139
- return render_jinja_template("navbar.html", self.j_env, template_vars)
140
-
141
- def create_footer(self):
142
- """
143
- Renders the footer from the templates directory
144
-
145
- :return str: footer HTML
146
- """
147
- return render_jinja_template("footer.html", self.j_env, dict(version=v))
148
-
149
- def create_navbar_links(
150
- self, objs, stats, wd=None, context=None, include_status=True
151
- ):
152
- """
153
- Return a string containing the navbar prebuilt html.
154
-
155
- Generates links to each page relative to the directory of interest (wd arg) or uses the provided context to
156
- create the paths (context arg)
157
-
158
- :param pandas.DataFrame objs: project results dataframe containing
159
- object data
160
- :param list stats[dict] stats: a summary file of pipeline statistics for each
161
- analyzed sample
162
- :param path wd: the working directory of the current HTML page being generated, enables navbar links
163
- relative to page
164
- :param list[str] context: the context the links will be used in.
165
- The sequence of directories to be prepended to the HTML file in the resulting navbar
166
- :param bool include_status: whether the status link should be included in the links set
167
- :return str: navbar links as HTML-formatted string
168
- """
169
- if wd is None and context is None:
170
- raise ValueError(
171
- "Either 'wd' (path the links should be relative to) or 'context'"
172
- " (the context for the links) has to be provided."
173
- )
174
- status_relpath = _make_relpath(
175
- file_name=os.path.join(self.reports_dir, "status.html"),
176
- wd=wd,
177
- context=context,
178
- )
179
- objects_relpath = _make_relpath(
180
- file_name=os.path.join(self.reports_dir, "objects.html"),
181
- wd=wd,
182
- context=context,
183
- )
184
- samples_relpath = _make_relpath(
185
- file_name=os.path.join(self.reports_dir, "samples.html"),
186
- wd=wd,
187
- context=context,
188
- )
189
- dropdown_keys_objects = None
190
- dropdown_relpaths_objects = None
191
- dropdown_relpaths_samples = None
192
- sample_names = None
193
- if objs is not None and not objs.dropna().empty:
194
- # If the number of objects is 20 or less, use a drop-down menu
195
- if len(objs["key"].drop_duplicates()) <= 20:
196
- (
197
- dropdown_relpaths_objects,
198
- dropdown_keys_objects,
199
- ) = _get_navbar_dropdown_data_objects(
200
- objs=objs, wd=wd, context=context, reports_dir=self.reports_dir
201
- )
202
- else:
203
- dropdown_relpaths_objects = objects_relpath
204
- if stats:
205
- if len(stats) <= 20:
206
- (
207
- dropdown_relpaths_samples,
208
- sample_names,
209
- ) = _get_navbar_dropdown_data_samples(
210
- stats=stats, wd=wd, context=context, reports_dir=self.reports_dir
211
- )
212
- else:
213
- # Create a menu link to the samples parent page
214
- dropdown_relpaths_samples = samples_relpath
215
- status_page_name = "Status" if include_status else None
216
- template_vars = dict(
217
- status_html_page=status_relpath,
218
- status_page_name=status_page_name,
219
- dropdown_keys_objects=dropdown_keys_objects,
220
- objects_page_name="Objects",
221
- samples_page_name="Samples",
222
- objects_html_page=dropdown_relpaths_objects,
223
- samples_html_page=dropdown_relpaths_samples,
224
- menu_name_objects="Objects",
225
- menu_name_samples="Samples",
226
- sample_names=sample_names,
227
- all_samples=samples_relpath,
228
- all_objects=objects_relpath,
229
- )
230
- return render_jinja_template("navbar_links.html", self.j_env, template_vars)
231
-
232
- def create_object_html(self, single_object, navbar, footer):
233
- """
234
- Generates a page for an individual object type with all of its
235
- plots from each sample
236
-
237
- :param pandas.DataFrame single_object: contains reference
238
- information for an individual object type for all samples
239
- :param pandas.DataFrame objs: project level dataframe
240
- containing any reported objects for all samples
241
- :param str navbar: HTML to be included as the navbar in the main summary page
242
- :param str footer: HTML to be included as the footer
243
- """
244
-
245
- # Generate object filename
246
- for key in single_object["key"].drop_duplicates().sort_values():
247
- # even though it's always one element, loop to extract the data
248
- current_name = str(key)
249
- filename = current_name + ".html"
250
- html_page_path = os.path.join(
251
- self.reports_dir, filename.replace(" ", "_").lower()
252
- )
253
-
254
- if not os.path.exists(os.path.dirname(html_page_path)):
255
- os.makedirs(os.path.dirname(html_page_path))
256
-
257
- links = []
258
- figures = []
259
- warnings = []
260
- for i, row in single_object.iterrows():
261
- # Set the PATH to a page for the sample. Catch any errors.
262
- try:
263
- object_path = os.path.join(
264
- self.prj.results_folder, row["sample_name"], row["filename"]
265
- )
266
- object_relpath = os.path.relpath(object_path, self.reports_dir)
267
- except AttributeError:
268
- err_msg = "Sample: {} | " + "Missing valid object path for: {}"
269
- # Report the sample that fails, if that information exists
270
- if str(row["sample_name"]) and str(row["filename"]):
271
- _LOGGER.warning(err_msg.format(row["sample_name"], row["filename"]))
272
- else:
273
- _LOGGER.warning(err_msg.format("Unknown sample"))
274
- object_relpath = ""
275
-
276
- # Set the PATH to the image/file. Catch any errors.
277
- # Check if the object is an HTML document
278
-
279
- if not str(row["anchor_image"]).lower().endswith(IMAGE_EXTS):
280
- image_path = object_path
281
- else:
282
- try:
283
- image_path = os.path.join(
284
- self.prj.results_folder, row["sample_name"], row["anchor_image"]
285
- )
286
- except AttributeError:
287
- _LOGGER.warning(str(row))
288
- err_msg = "Sample: {} | " + "Missing valid image path for: {}"
289
- # Report the sample that fails, if that information exists
290
- if str(row["sample_name"]) and str(row["filename"]):
291
- _LOGGER.warning(
292
- err_msg.format(row["sample_name"], row["filename"])
293
- )
294
- else:
295
- _LOGGER.warning(err_msg.format("Unknown", "Unknown"))
296
- image_path = ""
297
- # Check for the presence of both the file and thumbnail
298
- if os.path.isfile(image_path) and os.path.isfile(object_path):
299
- image_relpath = os.path.relpath(image_path, self.reports_dir)
300
- # If the object has a valid image, use it!
301
- _LOGGER.debug("Checking image path: {}".format(image_path))
302
- if str(image_path).lower().endswith(IMAGE_EXTS):
303
- figures.append(
304
- [object_relpath, str(row["sample_name"]), image_relpath]
305
- )
306
- # Or if that "image" is not an image, treat it as a link
307
- elif not str(image_path).lower().endswith(IMAGE_EXTS):
308
- _LOGGER.debug("Got link")
309
- links.append([str(row["sample_name"]), image_relpath])
310
- else:
311
- warnings.append(str(row["filename"]))
312
-
313
- if warnings:
314
- _LOGGER.warning(
315
- "create_object_html: "
316
- + filename.replace(" ", "_").lower()
317
- + " references nonexistent object files"
318
- )
319
- _LOGGER.debug(
320
- filename.replace(" ", "_").lower()
321
- + " nonexistent files: "
322
- + ",".join(str(x) for x in warnings)
323
- )
324
- template_vars = dict(
325
- navbar=navbar,
326
- footer=footer,
327
- name=current_name,
328
- figures=figures,
329
- links=links,
330
- )
331
- save_html(
332
- html_page_path,
333
- render_jinja_template("object.html", self.j_env, args=template_vars),
334
- )
335
-
336
- def create_sample_html(self, objs, sample_name, sample_stats, navbar, footer):
337
- """
338
- Produce an HTML page containing all of a sample's objects
339
- and the sample summary statistics
340
-
341
- :param pandas.DataFrame objs: project level dataframe containing
342
- any reported objects for all samples
343
- :param str sample_name: the name of the current sample
344
- :param dict sample_stats: pipeline run statistics for the current sample
345
- :param str navbar: HTML to be included as the navbar in the main summary page
346
- :param str footer: HTML to be included as the footer
347
- :return str: path to the produced HTML page
348
- """
349
- html_filename = sample_name + ".html"
350
- html_page = os.path.join(
351
- self.reports_dir, html_filename.replace(" ", "_").lower()
352
- )
353
- sample_page_relpath = os.path.relpath(html_page, self._outdir)
354
- single_sample = (
355
- _pd.DataFrame() if objs.empty else objs[objs["sample_name"] == sample_name]
356
- )
357
- if not os.path.exists(os.path.dirname(html_page)):
358
- os.makedirs(os.path.dirname(html_page))
359
- sample_dir = os.path.join(self.prj.results_folder, sample_name)
360
- if os.path.exists(sample_dir):
361
- if single_sample.empty:
362
- # When there is no objects.tsv file, search for the
363
- # presence of log, profile, and command files
364
- log_name = _match_file_for_sample(
365
- sample_name, "log.md", self.prj.results_folder
366
- )
367
- profile_name = _match_file_for_sample(
368
- sample_name, "profile.tsv", self.prj.results_folder
369
- )
370
- command_name = _match_file_for_sample(
371
- sample_name, "commands.sh", self.prj.results_folder
372
- )
373
- else:
374
- log_name = str(single_sample.iloc[0]["annotation"]) + "_log.md"
375
- profile_name = str(single_sample.iloc[0]["annotation"]) + "_profile.tsv"
376
- command_name = str(single_sample.iloc[0]["annotation"]) + "_commands.sh"
377
- stats_name = "stats.tsv"
378
- flag = _get_flags(sample_dir)
379
- # get links to the files
380
- stats_file_path = _get_relpath_to_file(
381
- stats_name, sample_name, self.prj.results_folder, self.reports_dir
382
- )
383
- profile_file_path = _get_relpath_to_file(
384
- profile_name, sample_name, self.prj.results_folder, self.reports_dir
385
- )
386
- commands_file_path = _get_relpath_to_file(
387
- command_name, sample_name, self.prj.results_folder, self.reports_dir
388
- )
389
- log_file_path = _get_relpath_to_file(
390
- log_name, sample_name, self.prj.results_folder, self.reports_dir
391
- )
392
- if not flag:
393
- button_class = "btn btn-secondary"
394
- flag = "Missing"
395
- elif len(flag) > 1:
396
- button_class = "btn btn-secondary"
397
- flag = "Multiple"
398
- else:
399
- flag = flag[0]
400
- try:
401
- flag_dict = BUTTON_APPEARANCE_BY_FLAG[flag]
402
- except KeyError:
403
- button_class = "btn btn-secondary"
404
- flag = "Unknown"
405
- else:
406
- button_class = flag_dict["button_class"]
407
- flag = flag_dict["flag"]
408
- links = []
409
- figures = []
410
- warnings = []
411
- if not single_sample.empty:
412
- for sample_name in (
413
- single_sample["sample_name"].drop_duplicates().sort_values()
414
- ):
415
- o = single_sample[single_sample["sample_name"] == sample_name]
416
- for i, row in o.iterrows():
417
- try:
418
- # Image thumbnails are optional
419
- # This references to "image" should really
420
- # be "thumbnail"
421
- image_path = os.path.join(
422
- self.prj.results_folder, sample_name, row["anchor_image"]
423
- )
424
- image_relpath = os.path.relpath(image_path, self.reports_dir)
425
- except (AttributeError, TypeError):
426
- image_path = ""
427
- image_relpath = ""
428
-
429
- # These references to "page" should really be
430
- # "object", because they can be anything.
431
- page_path = os.path.join(
432
- self.prj.results_folder, sample_name, row["filename"]
433
- )
434
- page_relpath = os.path.relpath(page_path, self.reports_dir)
435
- # If the object has a thumbnail image, add as a figure
436
- if os.path.isfile(image_path) and os.path.isfile(page_path):
437
- # If the object has a valid image, add as a figure
438
- if (
439
- str(image_path)
440
- .lower()
441
- .endswith((".png", ".jpg", ".jpeg", ".svg", ".gif"))
442
- ):
443
- figures.append(
444
- [page_relpath, str(row["key"]), image_relpath]
445
- )
446
- # Otherwise treat as a link
447
- elif os.path.isfile(page_path):
448
- links.append([str(row["key"]), page_relpath])
449
- # If neither, there is no object by that name
450
- else:
451
- warnings.append(str(row["filename"]))
452
- # If no thumbnail image, it's just a link
453
- elif os.path.isfile(page_path):
454
- links.append([str(row["key"]), page_relpath])
455
- # If no file present, there is no object by that name
456
- else:
457
- warnings.append(str(row["filename"]))
458
- else:
459
- # Sample was not run through the pipeline
460
- _LOGGER.warning(
461
- "{} is not present in {}".format(sample_name, self.prj.results_folder)
462
- )
463
-
464
- template_vars = dict(
465
- navbar=navbar,
466
- footer=footer,
467
- sample_name=sample_name,
468
- stats_file_path=stats_file_path,
469
- profile_file_path=profile_file_path,
470
- commands_file_path=commands_file_path,
471
- log_file_path=log_file_path,
472
- button_class=button_class,
473
- sample_stats=sample_stats,
474
- flag=flag,
475
- links=links,
476
- figures=figures,
477
- )
478
- save_html(
479
- html_page, render_jinja_template("sample.html", self.j_env, template_vars)
480
- )
481
- return sample_page_relpath
482
-
483
- def create_status_html(self, status_table, navbar, footer):
484
- """
485
- Generates a page listing all the samples, their run status, their
486
- log file, and the total runtime if completed.
487
-
488
- :param pandas.DataFrame objs: project level dataframe containing any reported objects for all samples
489
- :param str navbar: HTML to be included as the navbar in the main summary page
490
- :param str footer: HTML to be included as the footer
491
- :return str: rendered status HTML file
492
- """
493
- _LOGGER.debug("Building status page...")
494
- template_vars = dict(status_table=status_table, navbar=navbar, footer=footer)
495
- return render_jinja_template("status.html", self.j_env, template_vars)
496
-
497
- def create_project_objects(self):
498
- """
499
- Render available project level outputs defined in the
500
- pipeline output schemas
501
- """
502
- _LOGGER.debug("Building project objects section...")
503
- figures = []
504
- links = []
505
- warnings = []
506
- # For each protocol report the project summarizers' results
507
- self.prj.populate_pipeline_outputs()
508
- ifaces = self.prj.project_pipeline_interfaces
509
- # Check the interface files for summarizers
510
- for iface in ifaces:
511
- schema_paths = iface.get_pipeline_schemas(OUTPUT_SCHEMA_KEY)
512
- if schema_paths is not None:
513
- if isinstance(schema_paths, str):
514
- schema_paths = [schema_paths]
515
- for output_schema_path in schema_paths:
516
- results = get_project_outputs(
517
- self.prj, read_schema(output_schema_path)
518
- )
519
- for name, result in results.items():
520
- title = str(result.setdefault("title", "No caption"))
521
- result_type = str(result["type"])
522
- result_file = str(result["path"])
523
- result_img = str(result.setdefault("thumbnail_path", None))
524
- if result_img and not os.path.isabs(result_file):
525
- result_img = os.path.join(self._outdir, result_img)
526
- if not os.path.isabs(result_file):
527
- result_file = os.path.join(self._outdir, result_file)
528
- _LOGGER.debug(
529
- "Looking for project file: {}".format(result_file)
530
- )
531
- # Confirm the file itself was produced
532
- if glob.glob(result_file):
533
- file_path = str(glob.glob(result_file)[0])
534
- file_relpath = os.path.relpath(file_path, self._outdir)
535
- if result_type == "image":
536
- # Add as a figure, find thumbnail
537
- search = os.path.join(self._outdir, result_img)
538
- if glob.glob(search):
539
- img_path = str(glob.glob(search)[0])
540
- img_relpath = os.path.relpath(
541
- img_path, self._outdir
542
- )
543
- figures.append([file_relpath, title, img_relpath])
544
- # add as a link otherwise
545
- # TODO: add more fine-grained type support?
546
- # not just image and link
547
- else:
548
- links.append([title, file_relpath])
549
- else:
550
- warnings.append("{} ({})".format(title, result_file))
551
- else:
552
- _LOGGER.debug(
553
- "No project-level outputs defined in "
554
- "schema: {}".format(schema_paths)
555
- )
556
- if warnings:
557
- _LOGGER.warning("Not found: {}".format([str(x) for x in warnings]))
558
- _LOGGER.debug("collected project-level figures: {}".format(figures))
559
- _LOGGER.debug("collected project-level links: {}".format(links))
560
- template_vars = dict(figures=figures, links=links)
561
- return render_jinja_template("project_object.html", self.j_env, template_vars)
562
-
563
- def create_index_html(
564
- self, objs, stats, col_names, navbar, footer, navbar_reports=None
565
- ):
566
- """
567
- Generate an index.html style project home page w/ sample summary
568
- statistics
569
-
570
- :param pandas.DataFrame objs: project level dataframe containing
571
- any reported objects for all samples
572
- :param list[dict] stats: a summary file of pipeline statistics for each
573
- analyzed sample
574
- :param list col_names: all unique column names used in the stats file
575
- :param str navbar: HTML to be included as the navbar in the main summary page
576
- :param str footer: HTML to be included as the footer
577
- :param str navbar_reports: HTML to be included as the navbar for pages in the reports directory
578
- """
579
- # set default encoding when running in python2
580
- if sys.version[0] == "2":
581
- from importlib import reload
582
-
583
- reload(sys)
584
- sys.setdefaultencoding("utf-8")
585
- _LOGGER.debug("Building index page...")
586
- # copy the columns names and remove the sample_name one, since it will be processed differently
587
- cols = cp(col_names)
588
- cols.remove("sample_name")
589
- if navbar_reports is None:
590
- navbar_reports = navbar
591
- if not objs.dropna().empty:
592
- objs.drop_duplicates(keep="last", inplace=True)
593
- # Generate parent index.html page path
594
- index_html_path = get_file_for_project_old(self.prj, "summary.html")
595
-
596
- # Add stats_summary.tsv button link
597
- stats_file_name = os.path.join(self._outdir, self.prj.name)
598
- if hasattr(self.prj, "subproject") and self.prj.subproject:
599
- stats_file_name += "_" + self.prj.subproject
600
- stats_file_name += "_stats_summary.tsv"
601
- stats_file_path = os.path.relpath(stats_file_name, self._outdir)
602
- # Add stats summary table to index page and produce individual
603
- # sample pages
604
- if os.path.isfile(stats_file_name):
605
- # Produce table rows
606
- table_row_data = []
607
- samples_cols_missing = []
608
- _LOGGER.debug(" * Creating sample pages...")
609
- for row in stats:
610
- table_cell_data = []
611
- sample_name = row["sample_name"]
612
- sample_page = self.create_sample_html(
613
- objs, sample_name, row, navbar_reports, footer
614
- )
615
- # treat sample_name column differently - provide a link to the sample page
616
- table_cell_data.append([sample_page, sample_name])
617
- # for each column read the data from the stats
618
- for c in cols:
619
- try:
620
- table_cell_data.append(str(row[c]))
621
- except KeyError:
622
- table_cell_data.append("NA")
623
- samples_cols_missing.append(sample_name)
624
- table_row_data.append(table_cell_data)
625
- _LOGGER.debug(
626
- "Samples with missing columns: {}".format(set(samples_cols_missing))
627
- )
628
- else:
629
- _LOGGER.warning("No stats file '%s'", stats_file_name)
630
-
631
- # Create parent samples page with links to each sample
632
- save_html(
633
- os.path.join(self.reports_dir, "samples.html"),
634
- self.create_sample_parent_html(navbar_reports, footer),
635
- )
636
- _LOGGER.debug(" * Creating object pages...")
637
- # Create objects pages
638
- if not objs.dropna().empty:
639
- for key in objs["key"].drop_duplicates().sort_values():
640
- single_object = objs[objs["key"] == key]
641
- self.create_object_html(single_object, navbar_reports, footer)
642
-
643
- # Create parent objects page with links to each object type
644
- save_html(
645
- os.path.join(self.reports_dir, "objects.html"),
646
- self.create_object_parent_html(objs, navbar_reports, footer),
647
- )
648
- # Create status page with each sample's status listed
649
- save_html(
650
- os.path.join(self.reports_dir, "status.html"),
651
- self.create_status_html(
652
- create_status_table(self.prj), navbar_reports, footer
653
- ),
654
- )
655
- # Add project level objects
656
- project_objects = self.create_project_objects()
657
- # Complete and close HTML file
658
- template_vars = dict(
659
- project_name=self.prj.name,
660
- stats_json=_read_tsv_to_json(stats_file_name),
661
- navbar=navbar,
662
- footer=footer,
663
- stats_file_path=stats_file_path,
664
- project_objects=project_objects,
665
- columns=col_names,
666
- table_row_data=table_row_data,
667
- )
668
- save_html(
669
- index_html_path,
670
- render_jinja_template("index.html", self.j_env, template_vars),
671
- )
672
- return index_html_path
673
-
674
-
675
- def render_jinja_template(name, jinja_env, args=dict()):
676
- """
677
- Render template in the specified jinja environment using the provided args
678
-
679
- :param str name: name of the template
680
- :param dict args: arguments to pass to the template
681
- :param jinja2.Environment jinja_env: the initialized environment to use in this the looper HTML reports context
682
- :return str: rendered template
683
- """
684
- assert isinstance(args, dict), "args has to be a dict"
685
- template = jinja_env.get_template(name)
686
- return template.render(**args)
687
-
688
-
689
- def save_html(path, template):
690
- """
691
- Save rendered template as an HTML file
692
-
693
- :param str path: the desired location for the file to be produced
694
- :param str template: the template or just string
695
- """
696
- if not os.path.exists(os.path.dirname(path)):
697
- os.makedirs(os.path.dirname(path))
698
- try:
699
- with open(path, "w") as f:
700
- f.write(template)
701
- except IOError:
702
- _LOGGER.error("Could not write the HTML file: {}".format(path))
703
-
704
-
705
- def get_jinja_env(templates_dirname=None):
706
- """
707
- Create jinja environment with the provided path to the templates directory
708
-
709
- :param str templates_dirname: path to the templates directory
710
- :return jinja2.Environment: jinja environment
711
- """
712
- if templates_dirname is None:
713
- file_dir = os.path.dirname(os.path.realpath(__file__))
714
- templates_dirname = os.path.join(file_dir, f"{TEMPLATES_DIRNAME}_old")
715
- _LOGGER.debug("Using templates dir: " + templates_dirname)
716
- return jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dirname))
717
-
718
-
719
- def _get_flags(sample_dir):
720
- """
721
- Get the flag(s) present in the directory
722
-
723
- :param str sample_dir: path to the directory to be searched for flags
724
- :return list: flags found in the dir
725
- """
726
- assert os.path.exists(sample_dir), "The provided path ('{}') does not exist".format(
727
- sample_dir
728
- )
729
- flag_files = glob.glob(os.path.join(sample_dir, "*.flag"))
730
- if len(flag_files) > 1:
731
- _LOGGER.warning(
732
- "Multiple flag files ({files_count}) found in sample dir '{sample_dir}'".format(
733
- files_count=len(flag_files), sample_dir=sample_dir
734
- )
735
- )
736
- if len(flag_files) == 0:
737
- _LOGGER.warning(
738
- "No flag files found in sample dir '{sample_dir}'".format(
739
- sample_dir=sample_dir
740
- )
741
- )
742
- return [
743
- re.search(r"\_([a-z]+)\.flag$", os.path.basename(f)).groups()[0]
744
- for f in flag_files
745
- ]
746
-
747
-
748
- def _match_file_for_sample(sample_name, appendix, location, full_path=False):
749
- """
750
- Safely looks for files matching the appendix in the specified location for the sample
751
-
752
- :param str sample_name: name of the sample that the file name should be found for
753
- :param str appendix: the ending specific for the file
754
- :param str location: where to look for the file
755
- :param bool full_path: whether to return full path
756
- :return str: the name of the matched file
757
- """
758
- regex = "*" + appendix
759
- search_pattern = os.path.join(location, sample_name, regex)
760
- matches = glob.glob(search_pattern)
761
- if len(matches) < 1:
762
- return None
763
- elif len(matches) > 1:
764
- _LOGGER.warning(
765
- "matched mutiple files for '{}'. Returning the first one".format(
766
- search_pattern
767
- )
768
- )
769
- return matches[0] if full_path else os.path.basename(matches[0])
770
-
771
-
772
- def _get_relpath_to_file(file_name, sample_name, location, relative_to):
773
- """
774
- Safely gets the relative path for the file for the specified sample
775
-
776
- :param str file_name: name of the file
777
- :param str sample_name: name of the sample that the file path should be found for
778
- :param str location: where to look for the file
779
- :param str relative_to: path the result path should be relative to
780
- :return str: a path to the file
781
- """
782
- abs_file_path = os.path.join(location, sample_name, file_name)
783
- rel_file_path = os.path.relpath(abs_file_path, relative_to)
784
- if file_name is None or not os.path.exists(abs_file_path):
785
- return None
786
- return rel_file_path
787
-
788
-
789
- def _make_relpath(file_name, wd, context=None):
790
- """
791
- Create a path relative to the context. This function introduces the flexibility to the navbar links creation,
792
- which the can be used outside of the native looper summary pages.
793
-
794
- :param str file_name: the path to make relative
795
- :param str wd: the dir the path should be relative to
796
- :param list[str] context: the context the links will be used in.
797
- The sequence of directories to be prepended to the HTML file in the resulting navbar
798
- :return str: relative path
799
- """
800
- relpath = os.path.relpath(file_name, wd)
801
- return relpath if not context else os.path.join(os.path.join(*context), relpath)
802
-
803
-
804
- def _get_navbar_dropdown_data_objects(objs, wd, context, reports_dir):
805
- if objs is None:
806
- return None, None
807
- relpaths = []
808
- df_keys = objs["key"].drop_duplicates().sort_values()
809
- for key in df_keys:
810
- page_name = os.path.join(reports_dir, (key + ".html").replace(" ", "_").lower())
811
- relpaths.append(_make_relpath(page_name, wd, context))
812
- return relpaths, df_keys
813
-
814
-
815
- def _get_navbar_dropdown_data_samples(stats, wd, context, reports_dir):
816
- if stats is None:
817
- return None, None
818
- relpaths = []
819
- sample_names = []
820
- for sample in stats:
821
- for entry, val in sample.items():
822
- if entry == "sample_name":
823
- sample_name = str(val)
824
- page_name = os.path.join(
825
- reports_dir, (sample_name + ".html").replace(" ", "_").lower()
826
- )
827
- relpaths.append(_make_relpath(page_name, wd, context))
828
- sample_names.append(sample_name)
829
- break
830
- else:
831
- _LOGGER.warning("Could not determine sample name in stats.tsv")
832
- return relpaths, sample_names
833
-
834
-
835
- def _read_csv_encodings(path, encodings=["utf-8", "ascii"], **kwargs):
836
- """
837
- Try to read file with the provided encodings
838
-
839
- :param str path: path to file
840
- :param list encodings: list of encodings to try
841
- """
842
- idx = 0
843
- while idx < len(encodings):
844
- e = encodings[idx]
845
- try:
846
- t = _pd.read_csv(path, encoding=e, **kwargs)
847
- return t
848
- except UnicodeDecodeError:
849
- pass
850
- idx = idx + 1
851
- _LOGGER.warning(
852
- "Could not read the log file '{p}' with encodings '{enc}'".format(
853
- p=path, enc=encodings
854
- )
855
- )
856
-
857
-
858
- def _get_from_log(log_path, regex):
859
- """
860
- Get the value for the matched key from log file
861
-
862
- :param str log_path: path to the log file
863
- :param str regex: matching str. Should be formatted as follows: r'(phrase to match)'
864
- :return str: matched and striped string
865
- :raises IOError: when the file is not found in the provided path
866
- """
867
- if not os.path.exists(log_path):
868
- raise IOError("Can't read the log file '{}'. Not found".format(log_path))
869
- log = _read_csv_encodings(log_path, header=None, names=["data"])
870
- if log is None:
871
- _LOGGER.warning("'{r}' was not read from log".format(r=regex))
872
- return None
873
- # match regex, get row(s) that matched the regex
874
- log_row = log.iloc[:, 0].str.extractall(regex)
875
- # not matches? return None
876
- if log_row.empty:
877
- return None
878
- if log_row.size > 1:
879
- _LOGGER.warning(
880
- "When parsing '{lp}', more than one values matched with: {r}. Returning first.".format(
881
- lp=log_path, r=regex
882
- )
883
- )
884
- # split the matched line by first colon return stripped data.
885
- # This way both mem values (e.g 1.1GB) and time values (e.g 1:10:10) will work.
886
- val = log.iloc[log_row.index[0][0]].str.split(":", 1, expand=True)[1][0].strip()
887
- return val
888
-
889
-
890
- def _read_tsv_to_json(path):
891
- """
892
- Read a tsv file to a JSON formatted string
893
-
894
- :param path: to file path
895
- :return str: JSON formatted string
896
- """
897
- assert os.path.exists(path), "The file '{}' does not exist".format(path)
898
- _LOGGER.debug("Reading TSV from '{}'".format(path))
899
- df = _pd.read_csv(path, sep="\t", index_col=False, header=None)
900
- return df.to_json()
901
-
902
-
903
- def uniqify(seq):
904
- """Fast way to uniqify while preserving input order."""
905
- # http://stackoverflow.com/questions/480214/
906
- seen = set()
907
- seen_add = seen.add
908
- return [x for x in seq if not (x in seen or seen_add(x))]
909
-
910
-
911
- def create_status_table(prj, final=True):
912
- """
913
- Creates status table, the core of the status page.
914
- It is abstracted into a function so that it can be used in other software
915
- packages. It can produce a table of two types. With links to the
916
- samples/log files and without. The one without can be used to render HTMLs
917
- for on-th-fly job status inspection.
918
-
919
- :param looper.Project prj: project to create the status table for
920
- :param bool final: if the status table is created for a finalized looper
921
- run. In such a case, links to samples and log files will be provided
922
- :return str: rendered status HTML file
923
- """
924
- status_warning = False
925
- sample_warning = []
926
- log_paths = []
927
- log_link_names = []
928
- sample_paths = []
929
- sample_link_names = []
930
- flags = []
931
- row_classes = []
932
- times = []
933
- mems = []
934
- for sample in prj.samples:
935
- sample_name = str(sample.sample_name)
936
- sample_dir = os.path.join(prj.results_folder, sample_name)
937
-
938
- # Confirm sample directory exists, then build page
939
- if os.path.exists(sample_dir):
940
- # Grab the status flag for the current sample
941
- flag = _get_flags(sample_dir)
942
- if not flag:
943
- button_class = "table-secondary"
944
- flag = "Missing"
945
- elif len(flag) > 1:
946
- button_class = "table-secondary"
947
- flag = "Multiple"
948
- else:
949
- flag = flag[0]
950
- try:
951
- flag_dict = TABLE_APPEARANCE_BY_FLAG[flag]
952
- except KeyError:
953
- button_class = "table-secondary"
954
- flag = "Unknown"
955
- else:
956
- button_class = flag_dict["button_class"]
957
- flag = flag_dict["flag"]
958
- row_classes.append(button_class)
959
- # get first column data (sample name/link)
960
- page_name = sample_name + ".html"
961
- page_path = os.path.join(
962
- get_file_for_project_old(prj, "reports"),
963
- page_name.replace(" ", "_").lower(),
964
- )
965
- page_relpath = os.path.relpath(
966
- page_path, get_file_for_project_old(prj, "reports")
967
- )
968
- sample_paths.append(page_relpath)
969
- sample_link_names.append(sample_name)
970
- # get second column data (status/flag)
971
- flags.append(flag)
972
- # get third column data (log file/link)
973
- log_name = _match_file_for_sample(sample_name, "log.md", prj.results_folder)
974
- log_file_link = _get_relpath_to_file(
975
- log_name,
976
- sample_name,
977
- prj.results_folder,
978
- get_file_for_project_old(prj, "reports"),
979
- )
980
- log_link_names.append(log_name)
981
- log_paths.append(log_file_link)
982
- # get fourth column data (runtime) and fifth column data (memory)
983
- profile_file_path = _match_file_for_sample(
984
- sample.sample_name, "profile.tsv", prj.results_folder, full_path=True
985
- )
986
- if os.path.exists(profile_file_path):
987
- df = _pd.read_csv(
988
- profile_file_path, sep="\t", comment="#", names=PROFILE_COLNAMES
989
- )
990
- df["runtime"] = _pd.to_timedelta(df["runtime"])
991
- times.append(_get_runtime(df))
992
- mems.append(_get_maxmem(df))
993
- else:
994
- _LOGGER.warning("'{}' does not exist".format(profile_file_path))
995
- times.append(NO_DATA_PLACEHOLDER)
996
- mems.append(NO_DATA_PLACEHOLDER)
997
- else:
998
- # Sample was not run through the pipeline
999
- sample_warning.append(sample_name)
1000
-
1001
- # Alert the user to any warnings generated
1002
- if status_warning:
1003
- _LOGGER.warning(
1004
- "The stats table is incomplete, likely because one or "
1005
- "more jobs either failed or is still running."
1006
- )
1007
- if sample_warning:
1008
- _LOGGER.warning(
1009
- "{} samples not present in {}: {}".format(
1010
- len(sample_warning),
1011
- prj.results_folder,
1012
- str([sample for sample in sample_warning]),
1013
- )
1014
- )
1015
- template_vars = dict(
1016
- sample_link_names=sample_link_names,
1017
- row_classes=row_classes,
1018
- flags=flags,
1019
- times=times,
1020
- mems=mems,
1021
- )
1022
- template_name = "status_table_no_links.html"
1023
- if final:
1024
- template_name = "status_table.html"
1025
- template_vars.update(
1026
- dict(
1027
- sample_paths=sample_paths,
1028
- log_link_names=log_link_names,
1029
- log_paths=log_paths,
1030
- )
1031
- )
1032
- return render_jinja_template(template_name, get_jinja_env(), template_vars)
1033
-
1034
-
1035
- def _get_maxmem(profile_df):
1036
- """
1037
- Get current peak memory
1038
-
1039
- :param pandas.core.frame.DataFrame profile_df: a data frame representing the current profile.tsv for a sample
1040
- :return str: max memory
1041
- """
1042
- return "{} GB".format(
1043
- str(max(profile_df["mem"]) if not profile_df["mem"].empty else 0)
1044
- )
1045
-
1046
-
1047
- def _get_runtime(profile_df):
1048
- """
1049
- Collect the unique and last duplicated runtimes, sum them and then return in str format
1050
-
1051
- :param pandas.core.frame.DataFrame profile_df: a data frame representing the current profile.tsv for a sample
1052
- :return str: sum of runtimes
1053
- """
1054
- unique_df = profile_df[~profile_df.duplicated("cid", keep="last").values]
1055
- return str(
1056
- timedelta(seconds=sum(unique_df["runtime"].apply(lambda x: x.total_seconds())))
1057
- ).split(".")[0]