nnpdf 4.1.0__py3-none-any.whl → 4.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. n3fit/backends/keras_backend/MetaModel.py +27 -26
  2. n3fit/backends/keras_backend/callbacks.py +16 -8
  3. n3fit/backends/keras_backend/internal_state.py +13 -2
  4. n3fit/backends/keras_backend/operations.py +26 -26
  5. n3fit/hyper_optimization/hyper_scan.py +3 -9
  6. n3fit/hyper_optimization/penalties.py +11 -8
  7. n3fit/hyper_optimization/rewards.py +65 -34
  8. n3fit/model_gen.py +344 -270
  9. n3fit/model_trainer.py +71 -105
  10. n3fit/performfit.py +2 -7
  11. n3fit/tests/regressions/quickcard_1.json +12 -28
  12. n3fit/tests/regressions/quickcard_3.json +12 -28
  13. n3fit/tests/regressions/quickcard_pol_1.json +10 -26
  14. n3fit/tests/regressions/quickcard_pol_3.json +9 -25
  15. n3fit/tests/regressions/quickcard_qed_1.json +11 -27
  16. n3fit/tests/regressions/quickcard_qed_3.json +11 -27
  17. n3fit/tests/test_hyperopt.py +6 -12
  18. n3fit/tests/test_layers.py +6 -6
  19. n3fit/tests/test_modelgen.py +73 -24
  20. n3fit/tests/test_multireplica.py +52 -16
  21. n3fit/tests/test_penalties.py +7 -8
  22. n3fit/tests/test_preprocessing.py +2 -2
  23. n3fit/tests/test_vpinterface.py +5 -10
  24. n3fit/vpinterface.py +88 -44
  25. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/METADATA +9 -3
  26. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/RECORD +105 -67
  27. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/WHEEL +1 -1
  28. nnpdf_data/_version.py +1 -1
  29. nnpdf_data/commondata/ATLAS_2JET_7TEV_R06/metadata.yaml +16 -5
  30. nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/data.yaml +2 -0
  31. nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/kinematics.yaml +13 -0
  32. nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/metadata.yaml +51 -0
  33. nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/uncertainties.yaml +17 -0
  34. nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/data.yaml +2 -0
  35. nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/kinematics.yaml +13 -0
  36. nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/metadata.yaml +52 -0
  37. nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/uncertainties.yaml +22 -0
  38. nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/data.yaml +3 -0
  39. nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/kinematics.yaml +17 -0
  40. nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/metadata.yaml +57 -0
  41. nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/uncertainties.yaml +8 -0
  42. nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/data.yaml +2 -0
  43. nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/kinematics.yaml +9 -0
  44. nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/metadata.yaml +54 -0
  45. nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/uncertainties.yaml +7 -0
  46. nnpdf_data/commondata/CMS_1JET_8TEV/metadata.yaml +7 -1
  47. nnpdf_data/commondata/CMS_2JET_7TEV/metadata.yaml +16 -19
  48. nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/data.yaml +2 -0
  49. nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/kinematics.yaml +13 -0
  50. nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/metadata.yaml +51 -0
  51. nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/uncertainties.yaml +12 -0
  52. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_d2Sig_dmttBar_dyttBar.yaml +17 -0
  53. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dmttBar.yaml +8 -0
  54. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dpTt.yaml +8 -0
  55. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dyt.yaml +11 -0
  56. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/filter.py +260 -0
  57. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_d2Sig_dmttBar_dyttBar.yaml +193 -0
  58. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dmttBar.yaml +57 -0
  59. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dpTt.yaml +57 -0
  60. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dyt.yaml +81 -0
  61. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/metadata.yaml +114 -0
  62. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/mtt_abs_parton.yaml +828 -0
  63. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/mttytt-abs_parton.yaml +1899 -0
  64. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/ptt_abs_parton.yaml +828 -0
  65. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/submission.yaml +47 -0
  66. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/yt_abs_parton.yaml +1179 -0
  67. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_d2Sig_dmttBar_dyttBar.yaml +2282 -0
  68. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dmttBar.yaml +1256 -0
  69. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dpTt.yaml +1256 -0
  70. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dyt.yaml +1598 -0
  71. nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/data.yaml +2 -0
  72. nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/kinematics.yaml +13 -0
  73. nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/metadata.yaml +51 -0
  74. nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/uncertainties.yaml +17 -0
  75. nnpdf_data/commondata/CMS_TTBAR_5TEV_TOT/metadata.yaml +1 -1
  76. nnpdf_data/commondata/NNPDF_POS_2P24GEV/metadata.yaml +60 -0
  77. nnpdf_data/commondata/dataset_names.yml +6 -1
  78. nnpdf_data/theory_cards/41000010.yaml +42 -0
  79. nnpdf_data/theory_cards/41000011.yaml +43 -0
  80. nnpdf_data/theory_cards/41000012.yaml +43 -0
  81. nnpdf_data/theory_cards/41000013.yaml +42 -0
  82. nnpdf_data/theory_cards/41000014.yaml +43 -0
  83. nnpdf_data/theory_cards/41000015.yaml +43 -0
  84. validphys/_version.py +1 -1
  85. validphys/config.py +30 -10
  86. validphys/convolution.py +37 -14
  87. validphys/coredata.py +15 -5
  88. validphys/covmats.py +9 -2
  89. validphys/dataplots.py +1 -1
  90. validphys/filters.py +17 -3
  91. validphys/fkparser.py +11 -1
  92. validphys/gridvalues.py +1 -0
  93. validphys/hessian2mc.py +5 -5
  94. validphys/lhaindex.py +5 -0
  95. validphys/loader.py +1 -1
  96. validphys/n3fit_data.py +107 -61
  97. validphys/nnprofile_default.yaml +2 -1
  98. validphys/pineparser.py +12 -2
  99. validphys/scripts/postfit.py +4 -4
  100. validphys/scripts/vp_pdfrename.py +8 -9
  101. validphys/tests/conftest.py +6 -2
  102. validphys/tests/test_hessian2mc.py +7 -5
  103. validphys/utils.py +1 -0
  104. n3fit/tests/regressions/quickcard_pol/filter.yml +0 -80
  105. n3fit/tests/regressions/quickcard_pol/nnfit/input/lockfile.yaml +0 -111
  106. n3fit/tests/regressions/quickcard_pol/nnfit/replica_1/quickcard_pol.exportgrid +0 -572
  107. n3fit/tests/regressions/quickcard_pol/nnfit/replica_1/quickcard_pol.json +0 -71
  108. n3fit/tests/regressions/quickcard_pol/nnfit/replica_3/quickcard_pol.exportgrid +0 -615
  109. n3fit/tests/regressions/quickcard_pol/nnfit/replica_3/quickcard_pol.json +0 -71
  110. n3fit/tests/regressions/weights.weights.h5 +0 -0
  111. n3fit/tests/regressions/weights_pol.weights.h5 +0 -0
  112. n3fit/tests/test +0 -1
  113. nnpdf_data/theory_cards/40000099.yaml +0 -41
  114. nnpdf_data/theory_cards/40000099.yml +0 -41
  115. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/entry_points.txt +0 -0
  116. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info/licenses}/LICENSE +0 -0
validphys/covmats.py CHANGED
@@ -125,7 +125,7 @@ def covmat_from_systematics(
125
125
 
126
126
  def dataset_inputs_covmat_from_systematics(
127
127
  dataset_inputs_loaded_cd_with_cuts,
128
- data_input,
128
+ data_input=None,
129
129
  use_weights_in_covmat=True,
130
130
  norm_threshold=None,
131
131
  _list_of_central_values=None,
@@ -186,10 +186,16 @@ def dataset_inputs_covmat_from_systematics(
186
186
  special_corrs = []
187
187
  block_diags = []
188
188
  weights = []
189
+
189
190
  if _list_of_central_values is None:
190
191
  # want to just pass None to systematic_errors method
191
192
  _list_of_central_values = [None] * len(dataset_inputs_loaded_cd_with_cuts)
192
193
 
194
+ if data_input is None:
195
+ if use_weights_in_covmat:
196
+ raise ValueError("if use_weights_in_covmat=True, ``data_input`` cannot be empty")
197
+ data_input = [None] * len(dataset_inputs_loaded_cd_with_cuts)
198
+
193
199
  for cd, dsinp, central_values in zip(
194
200
  dataset_inputs_loaded_cd_with_cuts, data_input, _list_of_central_values
195
201
  ):
@@ -199,7 +205,8 @@ def dataset_inputs_covmat_from_systematics(
199
205
  else:
200
206
  sys_errors = cd.systematic_errors(central_values)
201
207
  stat_errors = cd.stat_errors.to_numpy()
202
- weights.append(np.full_like(stat_errors, dsinp.weight))
208
+ if use_weights_in_covmat and dsinp is not None:
209
+ weights.append(np.full_like(stat_errors, dsinp.weight))
203
210
  # separate out the special uncertainties which can be correlated across
204
211
  # datasets
205
212
  is_intra_dataset_error = sys_errors.columns.isin(INTRA_DATASET_SYS_NAME)
validphys/dataplots.py CHANGED
@@ -1422,7 +1422,7 @@ def plot_xq2(
1422
1422
  ... {'dataset': 'CMS_Z0_7TEV_DIMUON_2D'},
1423
1423
  ... {'dataset': 'CMS_WPWM_8TEV_MUON_Y', 'variant': 'legacy'},
1424
1424
  ... {'dataset': 'CMS_Z0J_8TEV_PT-Y', 'cfac': ['NRM'], 'variant': 'legacy_10'},
1425
- ... {'dataset': 'CMS_2JET_7TEV_M12Y'},
1425
+ ... {'dataset': 'CMS_2JET_7TEV_M12-Y', 'variant': 'legacy'},
1426
1426
  ... {'dataset': 'CMS_1JET_8TEV_PTY', 'variant': 'legacy'},
1427
1427
  ... {'dataset': 'CMS_TTBAR_7TEV_TOT_X-SEC', 'variant': 'legacy'},
1428
1428
  ... {'dataset': 'CMS_TTBAR_8TEV_TOT_X-SEC', 'variant': 'legacy'},
validphys/filters.py CHANGED
@@ -125,13 +125,18 @@ class FilterDefaults:
125
125
  class FilterRule:
126
126
  """
127
127
  Dataclass which carries the filter rule information.
128
+
128
129
  """
129
130
 
130
131
  dataset: str = None
131
132
  process_type: str = None
132
133
  rule: str = None
133
- reason: str = None
134
- local_variables: Mapping[str, Union[str, float]] = None
134
+ reason: str = dataclasses.field(
135
+ default=None, hash=False, compare=False
136
+ ) # Not relevant for hashing
137
+ local_variables: Mapping[str, Union[str, float]] = dataclasses.field(
138
+ default=None, hash=False
139
+ ) # Avoid hash issues with caching
135
140
  PTO: str = None
136
141
  FNS: str = None
137
142
  IC: str = None
@@ -163,9 +168,18 @@ def default_filter_rules_input():
163
168
  """
164
169
  Return a tuple of FilterRule objects.
165
170
  These are defined in ``filters.yaml`` in the ``validphys.cuts`` module.
171
+ Similarly to `parse_added_filter_rules`, this function checks if the rules
172
+ are unique, i.d. if there are no multiple rules for the same dataset of
173
+ process with the same rule (`reason` and `local_variables` are not hashed).
166
174
  """
167
175
  list_rules = yaml_safe.load(read_text(validphys.cuts, "filters.yaml"))
168
- return tuple(FilterRule(**rule) for rule in list_rules)
176
+ unique_rules = set(FilterRule(**rule) for rule in list_rules)
177
+ if len(unique_rules) != len(list_rules):
178
+ raise RuleProcessingError(
179
+ "Detected repeated filter rules. Please, make sure that "
180
+ " rules are not repeated in `filters.yaml`."
181
+ )
182
+ return tuple(unique_rules)
169
183
 
170
184
 
171
185
  def check_nonnegative(var: str):
validphys/fkparser.py CHANGED
@@ -18,6 +18,8 @@ CFactors applied.
18
18
  res = load_fktable(fk)
19
19
  """
20
20
 
21
+ # TODO: this module is deprecated and support for older theories is not guaranteed
22
+
21
23
  import dataclasses
22
24
  import functools
23
25
  import io
@@ -313,9 +315,17 @@ def parse_fktable(f):
313
315
  hadronic = res['GridInfo'].hadronic
314
316
  ndata = res['GridInfo'].ndata
315
317
  xgrid = res.pop('xGrid')
318
+ data_idx = sigma.index.get_level_values("data").unique().to_series()
316
319
 
317
320
  return FKTableData(
318
- sigma=sigma, ndata=ndata, Q0=Q0, metadata=res, hadronic=hadronic, xgrid=xgrid
321
+ sigma=sigma,
322
+ ndata=ndata,
323
+ Q0=Q0,
324
+ metadata=res,
325
+ hadronic=hadronic,
326
+ xgrid=xgrid,
327
+ data_index=data_idx,
328
+ legacy=True,
319
329
  )
320
330
  elif header_name in _KNOWN_SEGMENTS:
321
331
  parser = _KNOWN_SEGMENTS[header_name]
validphys/gridvalues.py CHANGED
@@ -6,6 +6,7 @@ LHAPDF. The tools for representing these grids are in pdfgrids.py
6
6
  (the validphys provider module), and the
7
7
  basis transformations are in pdfbases.py
8
8
  """
9
+
9
10
  import itertools
10
11
 
11
12
  import numpy as np
validphys/hessian2mc.py CHANGED
@@ -6,14 +6,14 @@ like MSHT20 and CT18 to Monte Carlo sets.
6
6
  The functions implemented here follow equations (4.3) of the paper arXiv:2203.05506
7
7
  """
8
8
 
9
- import pathlib
10
- import lhapdf
11
- import os
12
9
  import logging
10
+ import os
11
+
13
12
  import numpy as np
14
13
 
15
- from validphys.lhio import load_all_replicas, rep_matrix, write_replica
16
14
  from validphys.checks import check_pdf_is_hessian
15
+ from validphys.lhaindex import get_lha_datapath
16
+ from validphys.lhio import load_all_replicas, rep_matrix, write_replica
17
17
 
18
18
  log = logging.getLogger(__name__)
19
19
 
@@ -108,7 +108,7 @@ def write_hessian_to_mc_watt_thorne(pdf, mc_pdf_name, num_members, watt_thorne_r
108
108
  """
109
109
  hessian_set = pdf
110
110
 
111
- lhapdf_path = pathlib.Path(lhapdf.paths()[-1])
111
+ lhapdf_path = get_lha_datapath()
112
112
 
113
113
  # path to hessian lhapdf set
114
114
  hessian_pdf_path = lhapdf_path / str(hessian_set)
validphys/lhaindex.py CHANGED
@@ -150,3 +150,8 @@ def get_index_path(folder=None):
150
150
  folder = get_lha_datapath()
151
151
  index_file = os.path.join(folder, 'pdfsets.index')
152
152
  return index_file
153
+
154
+
155
+ def paths_prepend(new_path):
156
+ """Prepend a path to the LHAPDF list of paths so that it takes precedence."""
157
+ lhapdf.pathsPrepend(new_path.as_posix())
validphys/loader.py CHANGED
@@ -1168,7 +1168,7 @@ class RemoteLoader(LoaderBase):
1168
1168
  raise PDFNotFound("PDF '%s' is neither an uploaded fit nor an " "LHAPDF set." % name)
1169
1169
 
1170
1170
  def download_theoryID(self, thid):
1171
- thid = str(thid)
1171
+ thid = str(int(thid))
1172
1172
  remote = self.remote_theories
1173
1173
  if thid not in remote:
1174
1174
  raise TheoryNotFound("Theory %s not available." % thid)
validphys/n3fit_data.py CHANGED
@@ -22,6 +22,14 @@ from validphys.n3fit_data_utils import validphys_group_extractor
22
22
  log = logging.getLogger(__name__)
23
23
 
24
24
 
25
+ class Hashrray(TupleComp):
26
+ """Wrapper class to hash a numpy array so it can be cached."""
27
+
28
+ def __init__(self, array):
29
+ self.array = array
30
+ super().__init__(hash(self.array.tobytes()))
31
+
32
+
25
33
  def _per_replica(f):
26
34
  """Decorator to be used on top of reportengine's decorators.
27
35
  It replaces the preparation step of the decorator with a custom function,
@@ -153,7 +161,7 @@ class _Masks(TupleComp):
153
161
  super().__init__(group_name, seed)
154
162
 
155
163
 
156
- def _diagonal_masks(
164
+ def diagonal_masks(
157
165
  data, replica_trvlseed, dataset_inputs_fitting_covmat, diagonal_frac=1.0, threshold_eigvals=0
158
166
  ):
159
167
 
@@ -187,7 +195,7 @@ def _diagonal_masks(
187
195
  )
188
196
 
189
197
 
190
- def _standard_masks(data, replica_trvlseed):
198
+ def standard_masks(data, replica_trvlseed):
191
199
  """Generate the boolean masks used to split data into training and
192
200
  validation points. Returns a list of 1-D boolean arrays, one for each
193
201
  dataset. Each array has length equal to N_data, the datapoints which
@@ -203,6 +211,7 @@ def _standard_masks(data, replica_trvlseed):
203
211
 
204
212
  trmask_partial = []
205
213
  vlmask_partial = []
214
+ nomasking = True
206
215
  for dataset in data.datasets:
207
216
  # TODO: python commondata will not require this rubbish.
208
217
  # all data if cuts are None
@@ -214,6 +223,8 @@ def _standard_masks(data, replica_trvlseed):
214
223
  continue
215
224
 
216
225
  frac = dataset.frac
226
+ # nomasking turns to False as soon as one frac is not equal to 1
227
+ nomasking &= frac == 1.0
217
228
  # We do this so that a given dataset will always have the same number of points masked
218
229
  trmax = int(ndata * frac)
219
230
  if trmax == 0:
@@ -224,6 +235,9 @@ def _standard_masks(data, replica_trvlseed):
224
235
  vl_mask = ~tr_mask
225
236
  trmask_partial.append(tr_mask)
226
237
  vlmask_partial.append(vl_mask)
238
+ # if we are not masking, remove the seed from the object
239
+ if nomasking:
240
+ replica_trvlseed = None
227
241
  return _Masks(str(data), replica_trvlseed, trmask_partial, vlmask_partial)
228
242
 
229
243
 
@@ -304,65 +318,24 @@ def fittable_datasets_masked(data):
304
318
  return validphys_group_extractor(data.datasets)
305
319
 
306
320
 
307
- def fitting_data_dict(
308
- data,
309
- make_replica,
310
- dataset_inputs_loaded_cd_with_cuts,
311
- dataset_inputs_fitting_covmat,
312
- masks,
313
- kfold_masks,
314
- fittable_datasets_masked,
315
- diagonal_basis=False,
316
- ):
317
- """
318
- Provider which takes the information from validphys ``data``.
321
+ def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_fitting_covmat) -> Hashrray:
322
+ """Wrap the covmat into a Hashrray for caches to work"""
323
+ return Hashrray(dataset_inputs_fitting_covmat)
319
324
 
320
- Returns
321
- -------
322
- all_dict_out: dict
323
- Containing all the information of the experiment/dataset
324
- for training, validation and experimental With the following keys:
325
325
 
326
- 'datasets'
327
- list of dictionaries for each of the datasets contained in ``data``
328
- 'name'
329
- name of the ``data`` - typically experiment/group name
330
- 'expdata_true'
331
- non-replica data
332
- 'covmat'
333
- full covmat
334
- 'invcovmat_true'
335
- inverse of the covmat (non-replica)
336
- 'trmask'
337
- mask for the training data
338
- 'invcovmat'
339
- inverse of the covmat for the training data
340
- 'ndata'
341
- number of datapoints for the training data
342
- 'expdata'
343
- experimental data (replica'd) for training
344
- 'vlmask'
345
- (same as above for validation)
346
- 'invcovmat_vl'
347
- (same as above for validation)
348
- 'ndata_vl'
349
- (same as above for validation)
350
- 'expdata_vl'
351
- (same as above for validation)
352
- 'positivity'
353
- bool - is this a positivity set?
354
- 'count_chi2'
355
- should this be counted towards the chi2
356
- """
357
- # TODO: Plug in the python data loading when available. Including but not
358
- # limited to: central values, ndata, replica generation, covmat construction
359
- expdata_true = np.concatenate([d.central_values for d in dataset_inputs_loaded_cd_with_cuts])
360
- expdata = make_replica
326
+ @functools.lru_cache
327
+ def _inv_covmat_prepared(masks, _hashed_dataset_inputs_fitting_covmat, diagonal_basis=False):
328
+ """Returns the inverse covmats for training, validation and total
329
+ attending to the right masks and whether it is diagonal or not.
361
330
 
362
- covmat = dataset_inputs_fitting_covmat # t0 covmat, or theory covmat or whatever was decided by the runcard
363
- # TODO: use cholesky decomposition to get the inverse of the covariance matrix
364
- inv_true = np.linalg.inv(covmat)
365
- fittable_datasets = fittable_datasets_masked
331
+ Since the masks and number of datapoints need to be treated for 1-point datasets
332
+ it also returns the right ndata and masks for training and validation:
333
+
334
+ inv_total, inv_training, inv_validation, ndata_tr, ndata_vl, mask_tr, mask_vl, diagonal_rotation
335
+ """
336
+ covmat = _hashed_dataset_inputs_fitting_covmat.array
337
+ inv_total = np.linalg.inv(covmat)
338
+ diagonal_rotation = None
366
339
 
367
340
  if diagonal_basis:
368
341
  log.info("working in diagonal basis.")
@@ -372,7 +345,6 @@ def fitting_data_dict(
372
345
 
373
346
  # rotate the experimental data to the diagonal basis of the cormat and obtain training/validation masks
374
347
  diagonal_rotation = masks.diagonal_rotation
375
- expdata = diagonal_rotation @ expdata
376
348
  tr_mask = masks.tr_masks[0]
377
349
  vl_mask = masks.vl_masks[0]
378
350
 
@@ -455,6 +427,80 @@ def fitting_data_dict(
455
427
  ndata_tr -= len(data_zero_tr)
456
428
  ndata_vl -= len(data_zero_vl)
457
429
 
430
+ return (
431
+ inv_total,
432
+ invcovmat_tr,
433
+ invcovmat_vl,
434
+ ndata_tr,
435
+ ndata_vl,
436
+ tr_mask,
437
+ vl_mask,
438
+ diagonal_rotation,
439
+ )
440
+
441
+
442
+ def fitting_data_dict(
443
+ data,
444
+ make_replica,
445
+ dataset_inputs_loaded_cd_with_cuts,
446
+ dataset_inputs_fitting_covmat,
447
+ _inv_covmat_prepared,
448
+ kfold_masks,
449
+ fittable_datasets_masked,
450
+ ):
451
+ """
452
+ Provider which takes the information from validphys ``data``.
453
+
454
+ Returns
455
+ -------
456
+ all_dict_out: dict
457
+ Containing all the information of the experiment/dataset
458
+ for training, validation and experimental With the following keys:
459
+
460
+ 'datasets'
461
+ list of dictionaries for each of the datasets contained in ``data``
462
+ 'name'
463
+ name of the ``data`` - typically experiment/group name
464
+ 'expdata_true'
465
+ non-replica data
466
+ 'covmat'
467
+ full covmat
468
+ 'invcovmat_true'
469
+ inverse of the covmat (non-replica)
470
+ 'trmask'
471
+ mask for the training data
472
+ 'invcovmat'
473
+ inverse of the covmat for the training data
474
+ 'ndata'
475
+ number of datapoints for the training data
476
+ 'expdata'
477
+ experimental data (replica'd) for training
478
+ 'vlmask'
479
+ (same as above for validation)
480
+ 'invcovmat_vl'
481
+ (same as above for validation)
482
+ 'ndata_vl'
483
+ (same as above for validation)
484
+ 'expdata_vl'
485
+ (same as above for validation)
486
+ 'positivity'
487
+ bool - is this a positivity set?
488
+ 'count_chi2'
489
+ should this be counted towards the chi2
490
+ """
491
+ # TODO: Plug in the python data loading when available. Including but not
492
+ # limited to: central values, ndata, replica generation, covmat construction
493
+ expdata_true = np.concatenate([d.central_values for d in dataset_inputs_loaded_cd_with_cuts])
494
+ expdata = make_replica
495
+ fittable_datasets = fittable_datasets_masked
496
+
497
+ inv_true, invcovmat_tr, invcovmat_vl, ndata_tr, ndata_vl, tr_mask, vl_mask, diag_rot = (
498
+ _inv_covmat_prepared
499
+ )
500
+
501
+ if diag_rot is not None:
502
+ expdata = diag_rot @ expdata
503
+
458
504
  expdata_tr = expdata[tr_mask].reshape(1, -1)
459
505
  expdata_vl = expdata[vl_mask].reshape(1, -1)
460
506
 
@@ -477,7 +523,7 @@ def fitting_data_dict(
477
523
  "name": str(data),
478
524
  "expdata_true": expdata_true.reshape(1, -1),
479
525
  "invcovmat_true": inv_true,
480
- "covmat": covmat,
526
+ "covmat": dataset_inputs_fitting_covmat,
481
527
  "trmask": tr_mask,
482
528
  "invcovmat": invcovmat_tr,
483
529
  "ndata": ndata_tr,
@@ -489,7 +535,7 @@ def fitting_data_dict(
489
535
  "positivity": False,
490
536
  "count_chi2": True,
491
537
  "folds": folds,
492
- "data_transformation": diagonal_rotation if diagonal_basis else None,
538
+ "data_transformation": diag_rot,
493
539
  }
494
540
  return dict_out
495
541
 
@@ -37,7 +37,6 @@ ekos_path: ekos
37
37
  # Remote resource locations
38
38
  fit_urls:
39
39
  - 'https://data.nnpdf.science/fits/'
40
- - 'https://nnpdf.web.cern.ch/nnpdf/fits/'
41
40
 
42
41
  fit_index: 'fitdata.json'
43
42
 
@@ -47,12 +46,14 @@ hyperscan_urls:
47
46
  hyperscan_index: 'hyperscandata.json'
48
47
 
49
48
  theory_urls:
49
+ - 'https://nnpdf.nikhef.nl/nnpdf/theories/'
50
50
  - 'https://nnpdf.web.cern.ch/nnpdf/tables/'
51
51
  - 'https://nnpdf.web.cern.ch/nnpdf/tables_box/'
52
52
 
53
53
  theory_index: 'theorydata.json'
54
54
 
55
55
  eko_urls:
56
+ - 'https://nnpdf.nikhef.nl/nnpdf/ekos/'
56
57
  - 'https://nnpdf.web.cern.ch/nnpdf/ekos/'
57
58
  - 'https://nnpdf.web.cern.ch/nnpdf/ekos_box/'
58
59
 
validphys/pineparser.py CHANGED
@@ -220,6 +220,7 @@ def pineappl_reader(fkspec):
220
220
 
221
221
  partial_fktables = []
222
222
  ndata = 0
223
+ full_data_index = []
223
224
  for fkname, p in zip(fknames, pines):
224
225
  # Start by reading possible cfactors if cfactor is not empty
225
226
  cfprod = 1.0
@@ -267,6 +268,7 @@ def pineappl_reader(fkspec):
267
268
  partial_fktables.append(pd.DataFrame(df_fktable, columns=lumi_columns, index=idx))
268
269
 
269
270
  ndata += n
271
+ full_data_index.append(data_idx)
270
272
 
271
273
  # Finallly concatenate all fktables, sort by flavours and fill any holes
272
274
  sigma = pd.concat(partial_fktables, sort=True, copy=False).fillna(0.0)
@@ -285,8 +287,15 @@ def pineappl_reader(fkspec):
285
287
  ndata = 1
286
288
 
287
289
  if ndata == 1:
288
- # There's no doubt
289
- protected = divisor == name
290
+ # When the number of points is 1 and the fktable is a divisor, protect it from cuts
291
+ if divisor == name:
292
+ protected = True
293
+ full_data_index = [[0]]
294
+
295
+ # Keeping the data index as a series is exploited to speed up convolutions
296
+ # see e.g., convolution.py::_gv_hadron_predictions
297
+ fid = np.concatenate(full_data_index)
298
+ data_index = pd.Series(fid, index=fid, name="data")
290
299
 
291
300
  return FKTableData(
292
301
  sigma=sigma,
@@ -297,4 +306,5 @@ def pineappl_reader(fkspec):
297
306
  hadronic=hadronic,
298
307
  xgrid=xgrid,
299
308
  protected=protected,
309
+ data_index=data_index,
300
310
  )
@@ -22,12 +22,12 @@ import re
22
22
  import shutil
23
23
  import sys
24
24
 
25
- import lhapdf
26
-
27
25
  from reportengine import colors
28
26
  from validphys import fitdata, fitveto, lhio
29
27
  from validphys.core import PDF
30
28
  from validphys.fitveto import INTEG_THRESHOLD, NSIGMA_DISCARD_ARCLENGTH, NSIGMA_DISCARD_CHI2
29
+ from validphys.lhaindex import paths_prepend
30
+ from validphys.lhapdf_compatibility import make_pdf
31
31
  from validphys.loader import Loader
32
32
  from validphys.utils import tempfile_cleaner
33
33
 
@@ -218,13 +218,13 @@ def _postfit(
218
218
  log.info("Beginning construction of replica 0")
219
219
  # It's important that this is prepended, so that any existing instance of
220
220
  # `fitname` is not read from some other path
221
- lhapdf.pathsPrepend(str(postfit_path))
221
+ paths_prepend(postfit_path)
222
222
  generatingPDF = PDF(fitname)
223
223
  lhio.generate_replica0(generatingPDF)
224
224
 
225
225
  # Test replica 0
226
226
  try:
227
- lhapdf.mkPDF(fitname, 0)
227
+ make_pdf(fitname, 0)
228
228
  except RuntimeError as e:
229
229
  raise PostfitError("CRITICAL ERROR: Failure in reading replica zero") from e
230
230
 
@@ -1,12 +1,12 @@
1
1
  #!/usr/bin/env python
2
2
  """
3
- vp-pdfrename - command line tool to rename LHAPDFs
3
+ vp-pdfrename - command line tool to rename LHAPDFs
4
4
 
5
- To obtain the PDF from an fit, simply run
6
- vp-pdfrename <path-to-fit> <PDF name>. Optional flags allow for the
7
- resulting pdf to be placed in the LHAPDF directory, as well as modifying
8
- various fields of the info file. In addition, it is possible to compress
9
- the resulting PDF also using tar archiving.
5
+ To obtain the PDF from an fit, simply run
6
+ vp-pdfrename <path-to-fit> <PDF name>. Optional flags allow for the
7
+ resulting pdf to be placed in the LHAPDF directory, as well as modifying
8
+ various fields of the info file. In addition, it is possible to compress
9
+ the resulting PDF also using tar archiving.
10
10
  """
11
11
 
12
12
  import argparse
@@ -18,9 +18,8 @@ import sys
18
18
  import tarfile
19
19
  import tempfile
20
20
 
21
- import lhapdf
22
-
23
21
  from reportengine import colors
22
+ from validphys.lhaindex import get_lha_datapath
24
23
  from validphys.renametools import rename_pdf
25
24
  from validphys.utils import yaml_rt
26
25
 
@@ -118,7 +117,7 @@ def main():
118
117
  log.addHandler(colors.ColorHandler())
119
118
 
120
119
  if args.lhapdf_path:
121
- dest_path = pathlib.Path(lhapdf.paths()[-1]) / pdf_name
120
+ dest_path = get_lha_datapath() / pdf_name
122
121
  else:
123
122
  dest_path = source_path.with_name(pdf_name)
124
123
 
@@ -9,14 +9,18 @@ import pathlib
9
9
  import sys
10
10
 
11
11
  from hypothesis import settings
12
- import lhapdf
13
12
  import pytest
14
13
 
15
14
  # Adding this here to change the time of deadline from default (200ms) to 1500ms
16
15
  settings.register_profile("extratime", deadline=1500)
17
16
  settings.load_profile("extratime")
18
17
 
19
- lhapdf.setVerbosity(0)
18
+ try:
19
+ import lhapdf
20
+
21
+ lhapdf.setVerbosity(0)
22
+ except ModuleNotFoundError:
23
+ pass
20
24
 
21
25
 
22
26
  # Fortunately py.test works much like reportengine and providers are
@@ -1,8 +1,10 @@
1
+ import pathlib
2
+ from unittest import mock
3
+
1
4
  import numpy as np
2
5
  import pandas as pd
3
- from unittest import mock
4
- from validphys.hessian2mc import write_mc_watt_thorne_replicas, write_hessian_to_mc_watt_thorne
5
- import pathlib
6
+
7
+ from validphys.hessian2mc import write_hessian_to_mc_watt_thorne, write_mc_watt_thorne_replicas
6
8
 
7
9
 
8
10
  @mock.patch("validphys.hessian2mc.write_replica")
@@ -40,7 +42,7 @@ def test_write_mc_watt_thorne_replicas(mock_log_info, mock_write_replica):
40
42
  @mock.patch("validphys.hessian2mc.rep_matrix")
41
43
  @mock.patch("validphys.hessian2mc.write_new_lhapdf_info_file_from_previous_pdf")
42
44
  @mock.patch("validphys.hessian2mc.os.makedirs")
43
- @mock.patch("validphys.hessian2mc.lhapdf.paths")
45
+ @mock.patch("validphys.hessian2mc.get_lha_datapath")
44
46
  def test_write_hessian_to_mc_watt_thorne(
45
47
  mock_lhapdf_paths,
46
48
  mock_makedirs,
@@ -56,7 +58,7 @@ def test_write_hessian_to_mc_watt_thorne(
56
58
 
57
59
  mock_load_all_replicas.return_value = (None, None)
58
60
 
59
- mock_lhapdf_paths.return_value = [pathlib.Path("/path/to/lhapdf")]
61
+ mock_lhapdf_paths.return_value = pathlib.Path("/path/to/lhapdf")
60
62
 
61
63
  mock_rep_matrix.return_value = np.random.randn(5, 7) # Mocked replica matrix
62
64
 
validphys/utils.py CHANGED
@@ -8,6 +8,7 @@ from ruamel.yaml import YAML
8
8
 
9
9
  yaml_safe = YAML(typ='safe')
10
10
  yaml_rt = YAML(typ='rt')
11
+ yaml_rt.width = 2**31 # to prevent ruamel.yaml introducing linebreaks
11
12
 
12
13
 
13
14
  @contextlib.contextmanager